package strmatcher import ( "container/list" ) const validCharCount = 53 type MatchType struct { matchType Type exist bool } const ( TrieEdge bool = true FailEdge bool = false ) type Edge struct { edgeType bool nextNode int } // ACAutoMationMatcherGroup is an implementation of MatcherGroup. // It uses an AC Automata to provide support for Full, Domain and Substr matcher. Trie node is char based. type ACAutomatonMatcherGroup struct { trie [][validCharCount]Edge fail []int exists []MatchType count int } func newNode() [validCharCount]Edge { var s [validCharCount]Edge for i := range s { s[i] = Edge{ edgeType: FailEdge, nextNode: 0, } } return s } var char2Index = []int{ 'A': 0, 'a': 0, 'B': 1, 'b': 1, 'C': 2, 'c': 2, 'D': 3, 'd': 3, 'E': 4, 'e': 4, 'F': 5, 'f': 5, 'G': 6, 'g': 6, 'H': 7, 'h': 7, 'I': 8, 'i': 8, 'J': 9, 'j': 9, 'K': 10, 'k': 10, 'L': 11, 'l': 11, 'M': 12, 'm': 12, 'N': 13, 'n': 13, 'O': 14, 'o': 14, 'P': 15, 'p': 15, 'Q': 16, 'q': 16, 'R': 17, 'r': 17, 'S': 18, 's': 18, 'T': 19, 't': 19, 'U': 20, 'u': 20, 'V': 21, 'v': 21, 'W': 22, 'w': 22, 'X': 23, 'x': 23, 'Y': 24, 'y': 24, 'Z': 25, 'z': 25, '!': 26, '$': 27, '&': 28, '\'': 29, '(': 30, ')': 31, '*': 32, '+': 33, ',': 34, ';': 35, '=': 36, ':': 37, '%': 38, '-': 39, '.': 40, '_': 41, '~': 42, '0': 43, '1': 44, '2': 45, '3': 46, '4': 47, '5': 48, '6': 49, '7': 50, '8': 51, '9': 52, } func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup { ac := new(ACAutomatonMatcherGroup) ac.trie = append(ac.trie, newNode()) ac.fail = append(ac.fail, 0) ac.exists = append(ac.exists, MatchType{ matchType: Full, exist: false, }) return ac } // AddFullMatcher implements MatcherGroupForFull.AddFullMatcher. func (ac *ACAutomatonMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) { ac.addPattern(0, matcher.Pattern(), matcher.Type()) } // AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher. func (ac *ACAutomatonMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) { node := ac.addPattern(0, matcher.Pattern(), Full) ac.addPattern(node, ".", Domain) } // AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher. func (ac *ACAutomatonMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, _ uint32) { ac.addPattern(0, matcher.Pattern(), matcher.Type()) } func (ac *ACAutomatonMatcherGroup) addPattern(node int, pattern string, matcherType Type) int { for i := len(pattern) - 1; i >= 0; i-- { idx := char2Index[pattern[i]] if ac.trie[node][idx].nextNode == 0 { ac.count++ if len(ac.trie) < ac.count+1 { ac.trie = append(ac.trie, newNode()) ac.fail = append(ac.fail, 0) ac.exists = append(ac.exists, MatchType{ matchType: Full, exist: false, }) } ac.trie[node][idx] = Edge{ edgeType: TrieEdge, nextNode: ac.count, } } node = ac.trie[node][idx].nextNode } ac.exists[node] = MatchType{ matchType: matcherType, exist: true, } return node } func (ac *ACAutomatonMatcherGroup) Build() { queue := list.New() for i := 0; i < validCharCount; i++ { if ac.trie[0][i].nextNode != 0 { queue.PushBack(ac.trie[0][i]) } } for { front := queue.Front() if front == nil { break } else { node := front.Value.(Edge).nextNode queue.Remove(front) for i := 0; i < validCharCount; i++ { if ac.trie[node][i].nextNode != 0 { ac.fail[ac.trie[node][i].nextNode] = ac.trie[ac.fail[node]][i].nextNode queue.PushBack(ac.trie[node][i]) } else { ac.trie[node][i] = Edge{ edgeType: FailEdge, nextNode: ac.trie[ac.fail[node]][i].nextNode, } } } } } } // Match implements MatcherGroup.Match. func (*ACAutomatonMatcherGroup) Match(_ string) []uint32 { return nil } // MatchAny implements MatcherGroup.MatchAny. func (ac *ACAutomatonMatcherGroup) MatchAny(s string) bool { node := 0 fullMatch := true // 1. the match string is all through trie edge. FULL MATCH or DOMAIN // 2. the match string is through a fail edge. NOT FULL MATCH // 2.1 Through a fail edge, but there exists a valid node. SUBSTR for i := len(s) - 1; i >= 0; i-- { idx := char2Index[s[i]] fullMatch = fullMatch && ac.trie[node][idx].edgeType node = ac.trie[node][idx].nextNode switch ac.exists[node].matchType { case Substr: return true case Domain: if fullMatch { return true } default: break } } return fullMatch && ac.exists[node].exist }