Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
347 lines
8.6 KiB
347 lines
8.6 KiB
package regexp2 |
|
|
|
import ( |
|
"bytes" |
|
"fmt" |
|
) |
|
|
|
// Match is a single regex result match that contains groups and repeated captures |
|
// -Groups |
|
// -Capture |
|
type Match struct { |
|
Group //embeded group 0 |
|
|
|
regex *Regexp |
|
otherGroups []Group |
|
|
|
// input to the match |
|
textpos int |
|
textstart int |
|
|
|
capcount int |
|
caps []int |
|
sparseCaps map[int]int |
|
|
|
// output from the match |
|
matches [][]int |
|
matchcount []int |
|
|
|
// whether we've done any balancing with this match. If we |
|
// have done balancing, we'll need to do extra work in Tidy(). |
|
balancing bool |
|
} |
|
|
|
// Group is an explicit or implit (group 0) matched group within the pattern |
|
type Group struct { |
|
Capture // the last capture of this group is embeded for ease of use |
|
|
|
Name string // group name |
|
Captures []Capture // captures of this group |
|
} |
|
|
|
// Capture is a single capture of text within the larger original string |
|
type Capture struct { |
|
// the original string |
|
text []rune |
|
// the position in the original string where the first character of |
|
// captured substring was found. |
|
Index int |
|
// the length of the captured substring. |
|
Length int |
|
} |
|
|
|
// String returns the captured text as a String |
|
func (c *Capture) String() string { |
|
return string(c.text[c.Index : c.Index+c.Length]) |
|
} |
|
|
|
// Runes returns the captured text as a rune slice |
|
func (c *Capture) Runes() []rune { |
|
return c.text[c.Index : c.Index+c.Length] |
|
} |
|
|
|
func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match { |
|
m := Match{ |
|
regex: regex, |
|
matchcount: make([]int, capcount), |
|
matches: make([][]int, capcount), |
|
textstart: startpos, |
|
balancing: false, |
|
} |
|
m.Name = "0" |
|
m.text = text |
|
m.matches[0] = make([]int, 2) |
|
return &m |
|
} |
|
|
|
func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match { |
|
m := newMatch(regex, capcount, text, startpos) |
|
m.sparseCaps = caps |
|
return m |
|
} |
|
|
|
func (m *Match) reset(text []rune, textstart int) { |
|
m.text = text |
|
m.textstart = textstart |
|
for i := 0; i < len(m.matchcount); i++ { |
|
m.matchcount[i] = 0 |
|
} |
|
m.balancing = false |
|
} |
|
|
|
func (m *Match) tidy(textpos int) { |
|
|
|
interval := m.matches[0] |
|
m.Index = interval[0] |
|
m.Length = interval[1] |
|
m.textpos = textpos |
|
m.capcount = m.matchcount[0] |
|
//copy our root capture to the list |
|
m.Group.Captures = []Capture{m.Group.Capture} |
|
|
|
if m.balancing { |
|
// The idea here is that we want to compact all of our unbalanced captures. To do that we |
|
// use j basically as a count of how many unbalanced captures we have at any given time |
|
// (really j is an index, but j/2 is the count). First we skip past all of the real captures |
|
// until we find a balance captures. Then we check each subsequent entry. If it's a balance |
|
// capture (it's negative), we decrement j. If it's a real capture, we increment j and copy |
|
// it down to the last free position. |
|
for cap := 0; cap < len(m.matchcount); cap++ { |
|
limit := m.matchcount[cap] * 2 |
|
matcharray := m.matches[cap] |
|
|
|
var i, j int |
|
|
|
for i = 0; i < limit; i++ { |
|
if matcharray[i] < 0 { |
|
break |
|
} |
|
} |
|
|
|
for j = i; i < limit; i++ { |
|
if matcharray[i] < 0 { |
|
// skip negative values |
|
j-- |
|
} else { |
|
// but if we find something positive (an actual capture), copy it back to the last |
|
// unbalanced position. |
|
if i != j { |
|
matcharray[j] = matcharray[i] |
|
} |
|
j++ |
|
} |
|
} |
|
|
|
m.matchcount[cap] = j / 2 |
|
} |
|
|
|
m.balancing = false |
|
} |
|
} |
|
|
|
// isMatched tells if a group was matched by capnum |
|
func (m *Match) isMatched(cap int) bool { |
|
return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1) |
|
} |
|
|
|
// matchIndex returns the index of the last specified matched group by capnum |
|
func (m *Match) matchIndex(cap int) int { |
|
i := m.matches[cap][m.matchcount[cap]*2-2] |
|
if i >= 0 { |
|
return i |
|
} |
|
|
|
return m.matches[cap][-3-i] |
|
} |
|
|
|
// matchLength returns the length of the last specified matched group by capnum |
|
func (m *Match) matchLength(cap int) int { |
|
i := m.matches[cap][m.matchcount[cap]*2-1] |
|
if i >= 0 { |
|
return i |
|
} |
|
|
|
return m.matches[cap][-3-i] |
|
} |
|
|
|
// Nonpublic builder: add a capture to the group specified by "c" |
|
func (m *Match) addMatch(c, start, l int) { |
|
|
|
if m.matches[c] == nil { |
|
m.matches[c] = make([]int, 2) |
|
} |
|
|
|
capcount := m.matchcount[c] |
|
|
|
if capcount*2+2 > len(m.matches[c]) { |
|
oldmatches := m.matches[c] |
|
newmatches := make([]int, capcount*8) |
|
copy(newmatches, oldmatches[:capcount*2]) |
|
m.matches[c] = newmatches |
|
} |
|
|
|
m.matches[c][capcount*2] = start |
|
m.matches[c][capcount*2+1] = l |
|
m.matchcount[c] = capcount + 1 |
|
//log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches) |
|
} |
|
|
|
// Nonpublic builder: Add a capture to balance the specified group. This is used by the |
|
// balanced match construct. (?<foo-foo2>...) |
|
// |
|
// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c). |
|
// However, since we have backtracking, we need to keep track of everything. |
|
func (m *Match) balanceMatch(c int) { |
|
m.balancing = true |
|
|
|
// we'll look at the last capture first |
|
capcount := m.matchcount[c] |
|
target := capcount*2 - 2 |
|
|
|
// first see if it is negative, and therefore is a reference to the next available |
|
// capture group for balancing. If it is, we'll reset target to point to that capture. |
|
if m.matches[c][target] < 0 { |
|
target = -3 - m.matches[c][target] |
|
} |
|
|
|
// move back to the previous capture |
|
target -= 2 |
|
|
|
// if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it. |
|
if target >= 0 && m.matches[c][target] < 0 { |
|
m.addMatch(c, m.matches[c][target], m.matches[c][target+1]) |
|
} else { |
|
m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */) |
|
} |
|
} |
|
|
|
// Nonpublic builder: removes a group match by capnum |
|
func (m *Match) removeMatch(c int) { |
|
m.matchcount[c]-- |
|
} |
|
|
|
// GroupCount returns the number of groups this match has matched |
|
func (m *Match) GroupCount() int { |
|
return len(m.matchcount) |
|
} |
|
|
|
// GroupByName returns a group based on the name of the group, or nil if the group name does not exist |
|
func (m *Match) GroupByName(name string) *Group { |
|
num := m.regex.GroupNumberFromName(name) |
|
if num < 0 { |
|
return nil |
|
} |
|
return m.GroupByNumber(num) |
|
} |
|
|
|
// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist |
|
func (m *Match) GroupByNumber(num int) *Group { |
|
// check our sparse map |
|
if m.sparseCaps != nil { |
|
if newNum, ok := m.sparseCaps[num]; ok { |
|
num = newNum |
|
} |
|
} |
|
if num >= len(m.matchcount) || num < 0 { |
|
return nil |
|
} |
|
|
|
if num == 0 { |
|
return &m.Group |
|
} |
|
|
|
m.populateOtherGroups() |
|
|
|
return &m.otherGroups[num-1] |
|
} |
|
|
|
// Groups returns all the capture groups, starting with group 0 (the full match) |
|
func (m *Match) Groups() []Group { |
|
m.populateOtherGroups() |
|
g := make([]Group, len(m.otherGroups)+1) |
|
g[0] = m.Group |
|
copy(g[1:], m.otherGroups) |
|
return g |
|
} |
|
|
|
func (m *Match) populateOtherGroups() { |
|
// Construct all the Group objects first time called |
|
if m.otherGroups == nil { |
|
m.otherGroups = make([]Group, len(m.matchcount)-1) |
|
for i := 0; i < len(m.otherGroups); i++ { |
|
m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1]) |
|
} |
|
} |
|
} |
|
|
|
func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) { |
|
c := m.matchcount[groupnum] |
|
if c == 0 { |
|
return |
|
} |
|
|
|
matches := m.matches[groupnum] |
|
|
|
index := matches[(c-1)*2] |
|
last := index + matches[(c*2)-1] |
|
|
|
for ; index < last; index++ { |
|
buf.WriteRune(m.text[index]) |
|
} |
|
} |
|
|
|
func newGroup(name string, text []rune, caps []int, capcount int) Group { |
|
g := Group{} |
|
g.text = text |
|
if capcount > 0 { |
|
g.Index = caps[(capcount-1)*2] |
|
g.Length = caps[(capcount*2)-1] |
|
} |
|
g.Name = name |
|
g.Captures = make([]Capture, capcount) |
|
for i := 0; i < capcount; i++ { |
|
g.Captures[i] = Capture{ |
|
text: text, |
|
Index: caps[i*2], |
|
Length: caps[i*2+1], |
|
} |
|
} |
|
//log.Printf("newGroup! capcount %v, %+v", capcount, g) |
|
|
|
return g |
|
} |
|
|
|
func (m *Match) dump() string { |
|
buf := &bytes.Buffer{} |
|
buf.WriteRune('\n') |
|
if len(m.sparseCaps) > 0 { |
|
for k, v := range m.sparseCaps { |
|
fmt.Fprintf(buf, "Slot %v -> %v\n", k, v) |
|
} |
|
} |
|
|
|
for i, g := range m.Groups() { |
|
fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures)) |
|
|
|
for _, c := range g.Captures { |
|
fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String()) |
|
} |
|
} |
|
/* |
|
for i := 0; i < len(m.matchcount); i++ { |
|
fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i)) |
|
|
|
for j := 0; j < m.matchcount[i]; j++ { |
|
text := "" |
|
|
|
if m.matches[i][j*2] >= 0 { |
|
start := m.matches[i][j*2] |
|
text = m.text[start : start+m.matches[i][j*2+1]] |
|
} |
|
|
|
fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text) |
|
} |
|
} |
|
*/ |
|
return buf.String() |
|
}
|
|
|