diff --git a/app/router/condition_test.go b/app/router/condition_test.go index 20fb0c6b9..ce3c581b5 100644 --- a/app/router/condition_test.go +++ b/app/router/condition_test.go @@ -399,7 +399,7 @@ func TestChinaSites(t *testing.T) { } } -func BenchmarkACDomainMatcher(b *testing.B) { +func BenchmarkHybridDomainMatcher(b *testing.B) { domains, err := loadGeoSite("CN") common.Must(err) diff --git a/common/strmatcher/strmatcher.go b/common/strmatcher/strmatcher.go index 5f1dcd953..97d404f77 100644 --- a/common/strmatcher/strmatcher.go +++ b/common/strmatcher/strmatcher.go @@ -4,6 +4,9 @@ import ( "regexp" ) +// PrimeRK is the prime base used in Rabin-Karp algorithm. +const PrimeRK = 16777619 + // Matcher is the interface to determine a string matches a pattern. type Matcher interface { // Match returns true if the given string matches a predefined pattern. @@ -61,20 +64,42 @@ type matcherEntry struct { type ACAutomatonMatcherGroup struct { count uint32 ac *ACAutomaton + nonSubstrMap map[uint32]string otherMatchers []matcherEntry } func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup { var g = new(ACAutomatonMatcherGroup) g.count = 1 - g.ac = NewACAutomaton() + g.nonSubstrMap = map[uint32]string{} return g } +// Add `full` or `domain` pattern to hashmap +func (g *ACAutomatonMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) { + h := uint32(0) + for i := len(pattern) - 1; i >= 0; i-- { + h = h*PrimeRK + uint32(pattern[i]) + } + switch t { + case Full: + g.nonSubstrMap[h] = pattern + case Domain: + g.nonSubstrMap[h] = pattern + g.nonSubstrMap[h*PrimeRK+uint32('.')] = "." + pattern + default: + } +} + func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) { switch t { - case Full, Substr, Domain: + case Substr: + if g.ac == nil { + g.ac = NewACAutomaton() + } g.ac.Add(pattern, t) + case Full, Domain: + g.AddFullOrDomainPattern(pattern, t) case Regex: g.count++ r, err := regexp.Compile(pattern) @@ -92,18 +117,36 @@ func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, er } func (g *ACAutomatonMatcherGroup) Build() { - g.ac.Build() + if g.ac != nil { + g.ac.Build() + } } // Match implements IndexMatcher.Match. func (g *ACAutomatonMatcherGroup) Match(pattern string) []uint32 { result := []uint32{} - if g.ac.Match(pattern) { + hash := uint32(0) + for i := len(pattern) - 1; i >= 0; i-- { + hash = hash*PrimeRK + uint32(pattern[i]) + if pattern[i] == '.' { + if v, ok := g.nonSubstrMap[hash]; ok && v == pattern[i:] { + result = append(result, 1) + return result + } + } + } + if v, ok := g.nonSubstrMap[hash]; ok && v == pattern { result = append(result, 1) + return result + } + if g.ac != nil && g.ac.Match(pattern) { + result = append(result, 1) + return result } for _, e := range g.otherMatchers { if e.m.Match(pattern) { result = append(result, e.id) + return result } } return result