Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
389 lines
6.8 KiB
389 lines
6.8 KiB
package snowballstem |
|
|
|
import ( |
|
"log" |
|
"strings" |
|
"unicode/utf8" |
|
) |
|
|
|
// Env represents the Snowball execution environment |
|
type Env struct { |
|
current string |
|
Cursor int |
|
Limit int |
|
LimitBackward int |
|
Bra int |
|
Ket int |
|
} |
|
|
|
// NewEnv creates a new Snowball execution environment on the provided string |
|
func NewEnv(val string) *Env { |
|
return &Env{ |
|
current: val, |
|
Cursor: 0, |
|
Limit: len(val), |
|
LimitBackward: 0, |
|
Bra: 0, |
|
Ket: len(val), |
|
} |
|
} |
|
|
|
func (env *Env) Current() string { |
|
return env.current |
|
} |
|
|
|
func (env *Env) SetCurrent(s string) { |
|
env.current = s |
|
env.Cursor = 0 |
|
env.Limit = len(s) |
|
env.LimitBackward = 0 |
|
env.Bra = 0 |
|
env.Ket = len(s) |
|
} |
|
|
|
func (env *Env) ReplaceS(bra, ket int, s string) int32 { |
|
adjustment := int32(len(s)) - (int32(ket) - int32(bra)) |
|
result, _ := splitAt(env.current, bra) |
|
rsplit := ket |
|
if ket < bra { |
|
rsplit = bra |
|
} |
|
_, rhs := splitAt(env.current, rsplit) |
|
result += s |
|
result += rhs |
|
|
|
newLim := int32(env.Limit) + adjustment |
|
env.Limit = int(newLim) |
|
|
|
if env.Cursor >= ket { |
|
newCur := int32(env.Cursor) + adjustment |
|
env.Cursor = int(newCur) |
|
} else if env.Cursor > bra { |
|
env.Cursor = bra |
|
} |
|
|
|
env.current = result |
|
return adjustment |
|
} |
|
|
|
func (env *Env) EqS(s string) bool { |
|
if env.Cursor >= env.Limit { |
|
return false |
|
} |
|
|
|
if strings.HasPrefix(env.current[env.Cursor:], s) { |
|
env.Cursor += len(s) |
|
for !onCharBoundary(env.current, env.Cursor) { |
|
env.Cursor++ |
|
} |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
func (env *Env) EqSB(s string) bool { |
|
if int32(env.Cursor)-int32(env.LimitBackward) < int32(len(s)) { |
|
return false |
|
} else if !onCharBoundary(env.current, env.Cursor-len(s)) || |
|
!strings.HasPrefix(env.current[env.Cursor-len(s):], s) { |
|
return false |
|
} else { |
|
env.Cursor -= len(s) |
|
return true |
|
} |
|
} |
|
|
|
func (env *Env) SliceFrom(s string) bool { |
|
bra, ket := env.Bra, env.Ket |
|
env.ReplaceS(bra, ket, s) |
|
return true |
|
} |
|
|
|
func (env *Env) NextChar() { |
|
env.Cursor++ |
|
for !onCharBoundary(env.current, env.Cursor) { |
|
env.Cursor++ |
|
} |
|
} |
|
|
|
func (env *Env) PrevChar() { |
|
env.Cursor-- |
|
for !onCharBoundary(env.current, env.Cursor) { |
|
env.Cursor-- |
|
} |
|
} |
|
|
|
func (env *Env) ByteIndexForHop(delta int32) int32 { |
|
if delta > 0 { |
|
res := env.Cursor |
|
for delta > 0 { |
|
res++ |
|
delta-- |
|
for res <= len(env.current) && !onCharBoundary(env.current, res) { |
|
res++ |
|
} |
|
} |
|
return int32(res) |
|
} else if delta < 0 { |
|
res := env.Cursor |
|
for delta < 0 { |
|
res-- |
|
delta++ |
|
for res >= 0 && !onCharBoundary(env.current, res) { |
|
res-- |
|
} |
|
} |
|
return int32(res) |
|
} else { |
|
return int32(env.Cursor) |
|
} |
|
} |
|
|
|
func (env *Env) InGrouping(chars []byte, min, max int32) bool { |
|
if env.Cursor >= env.Limit { |
|
return false |
|
} |
|
|
|
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:]) |
|
if r != utf8.RuneError { |
|
if r > max || r < min { |
|
return false |
|
} |
|
r -= min |
|
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 { |
|
return false |
|
} |
|
env.NextChar() |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
func (env *Env) InGroupingB(chars []byte, min, max int32) bool { |
|
if env.Cursor <= env.LimitBackward { |
|
return false |
|
} |
|
env.PrevChar() |
|
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:]) |
|
if r != utf8.RuneError { |
|
env.NextChar() |
|
if r > max || r < min { |
|
return false |
|
} |
|
r -= min |
|
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 { |
|
return false |
|
} |
|
env.PrevChar() |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
func (env *Env) OutGrouping(chars []byte, min, max int32) bool { |
|
if env.Cursor >= env.Limit { |
|
return false |
|
} |
|
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:]) |
|
if r != utf8.RuneError { |
|
if r > max || r < min { |
|
env.NextChar() |
|
return true |
|
} |
|
r -= min |
|
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 { |
|
env.NextChar() |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
func (env *Env) OutGroupingB(chars []byte, min, max int32) bool { |
|
if env.Cursor <= env.LimitBackward { |
|
return false |
|
} |
|
env.PrevChar() |
|
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:]) |
|
if r != utf8.RuneError { |
|
env.NextChar() |
|
if r > max || r < min { |
|
env.PrevChar() |
|
return true |
|
} |
|
r -= min |
|
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 { |
|
env.PrevChar() |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
func (env *Env) SliceDel() bool { |
|
return env.SliceFrom("") |
|
} |
|
|
|
func (env *Env) Insert(bra, ket int, s string) { |
|
adjustment := env.ReplaceS(bra, ket, s) |
|
if bra <= env.Bra { |
|
env.Bra = int(int32(env.Bra) + adjustment) |
|
} |
|
if bra <= env.Ket { |
|
env.Ket = int(int32(env.Ket) + adjustment) |
|
} |
|
} |
|
|
|
func (env *Env) SliceTo() string { |
|
return env.current[env.Bra:env.Ket] |
|
} |
|
|
|
func (env *Env) FindAmong(amongs []*Among, ctx interface{}) int32 { |
|
var i int32 |
|
j := int32(len(amongs)) |
|
|
|
c := env.Cursor |
|
l := env.Limit |
|
|
|
var commonI, commonJ int |
|
|
|
firstKeyInspected := false |
|
for { |
|
k := i + ((j - i) >> 1) |
|
var diff int32 |
|
common := min(commonI, commonJ) |
|
w := amongs[k] |
|
for lvar := common; lvar < len(w.Str); lvar++ { |
|
if c+common == l { |
|
diff-- |
|
break |
|
} |
|
diff = int32(env.current[c+common]) - int32(w.Str[lvar]) |
|
if diff != 0 { |
|
break |
|
} |
|
common++ |
|
} |
|
if diff < 0 { |
|
j = k |
|
commonJ = common |
|
} else { |
|
i = k |
|
commonI = common |
|
} |
|
if j-i <= 1 { |
|
if i > 0 { |
|
break |
|
} |
|
if j == i { |
|
break |
|
} |
|
if firstKeyInspected { |
|
break |
|
} |
|
firstKeyInspected = true |
|
} |
|
} |
|
|
|
for { |
|
w := amongs[i] |
|
if commonI >= len(w.Str) { |
|
env.Cursor = c + len(w.Str) |
|
if w.F != nil { |
|
res := w.F(env, ctx) |
|
env.Cursor = c + len(w.Str) |
|
if res { |
|
return w.B |
|
} |
|
} else { |
|
return w.B |
|
} |
|
} |
|
i = w.A |
|
if i < 0 { |
|
return 0 |
|
} |
|
} |
|
} |
|
|
|
func (env *Env) FindAmongB(amongs []*Among, ctx interface{}) int32 { |
|
var i int32 |
|
j := int32(len(amongs)) |
|
|
|
c := env.Cursor |
|
lb := env.LimitBackward |
|
|
|
var commonI, commonJ int |
|
|
|
firstKeyInspected := false |
|
|
|
for { |
|
k := i + ((j - i) >> 1) |
|
diff := int32(0) |
|
common := min(commonI, commonJ) |
|
w := amongs[k] |
|
for lvar := len(w.Str) - int(common) - 1; lvar >= 0; lvar-- { |
|
if c-common == lb { |
|
diff-- |
|
break |
|
} |
|
diff = int32(env.current[c-common-1]) - int32(w.Str[lvar]) |
|
if diff != 0 { |
|
break |
|
} |
|
// Count up commons. But not one character but the byte width of that char |
|
common++ |
|
} |
|
if diff < 0 { |
|
j = k |
|
commonJ = common |
|
} else { |
|
i = k |
|
commonI = common |
|
} |
|
if j-i <= 1 { |
|
if i > 0 { |
|
break |
|
} |
|
if j == i { |
|
break |
|
} |
|
if firstKeyInspected { |
|
break |
|
} |
|
firstKeyInspected = true |
|
} |
|
} |
|
for { |
|
w := amongs[i] |
|
if commonI >= len(w.Str) { |
|
env.Cursor = c - len(w.Str) |
|
if w.F != nil { |
|
res := w.F(env, ctx) |
|
env.Cursor = c - len(w.Str) |
|
if res { |
|
return w.B |
|
} |
|
} else { |
|
return w.B |
|
} |
|
} |
|
i = w.A |
|
if i < 0 { |
|
return 0 |
|
} |
|
} |
|
} |
|
|
|
func (env *Env) Debug(count, lineNumber int) { |
|
log.Printf("snowball debug, count: %d, line: %d", count, lineNumber) |
|
} |
|
|
|
func (env *Env) Clone() *Env { |
|
clone := *env |
|
return &clone |
|
} |
|
|
|
func (env *Env) AssignTo() string { |
|
return env.Current() |
|
}
|
|
|