1
0
mirror of https://github.com/v2fly/v2ray-core.git synced 2024-06-17 04:55:23 +00:00

A memory-efficient and fast hybrid matcher (#639)

* a faster DomainMatcher implementation

* rename benchmark name

* fix linting errors

* add hybrid matcher

* add rabin-karp algorithm

* rename test & fix linting errors

* add more comment

* format code

* revert `MatcherGroup` match func

* fix linting errors
This commit is contained in:
DarthVader 2021-03-04 05:39:51 +08:00 committed by GitHub
parent 49cf614346
commit a31a8e6f89
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 5 deletions

View File

@ -399,7 +399,7 @@ func TestChinaSites(t *testing.T) {
}
}
func BenchmarkACDomainMatcher(b *testing.B) {
func BenchmarkHybridDomainMatcher(b *testing.B) {
domains, err := loadGeoSite("CN")
common.Must(err)

View File

@ -4,6 +4,9 @@ import (
"regexp"
)
// PrimeRK is the prime base used in Rabin-Karp algorithm.
const PrimeRK = 16777619
// Matcher is the interface to determine a string matches a pattern.
type Matcher interface {
// Match returns true if the given string matches a predefined pattern.
@ -61,20 +64,42 @@ type matcherEntry struct {
type ACAutomatonMatcherGroup struct {
count uint32
ac *ACAutomaton
nonSubstrMap map[uint32]string
otherMatchers []matcherEntry
}
func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
var g = new(ACAutomatonMatcherGroup)
g.count = 1
g.ac = NewACAutomaton()
g.nonSubstrMap = map[uint32]string{}
return g
}
// Add `full` or `domain` pattern to hashmap
func (g *ACAutomatonMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
h := uint32(0)
for i := len(pattern) - 1; i >= 0; i-- {
h = h*PrimeRK + uint32(pattern[i])
}
switch t {
case Full:
g.nonSubstrMap[h] = pattern
case Domain:
g.nonSubstrMap[h] = pattern
g.nonSubstrMap[h*PrimeRK+uint32('.')] = "." + pattern
default:
}
}
func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
switch t {
case Full, Substr, Domain:
case Substr:
if g.ac == nil {
g.ac = NewACAutomaton()
}
g.ac.Add(pattern, t)
case Full, Domain:
g.AddFullOrDomainPattern(pattern, t)
case Regex:
g.count++
r, err := regexp.Compile(pattern)
@ -92,18 +117,36 @@ func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, er
}
func (g *ACAutomatonMatcherGroup) Build() {
g.ac.Build()
if g.ac != nil {
g.ac.Build()
}
}
// Match implements IndexMatcher.Match.
func (g *ACAutomatonMatcherGroup) Match(pattern string) []uint32 {
result := []uint32{}
if g.ac.Match(pattern) {
hash := uint32(0)
for i := len(pattern) - 1; i >= 0; i-- {
hash = hash*PrimeRK + uint32(pattern[i])
if pattern[i] == '.' {
if v, ok := g.nonSubstrMap[hash]; ok && v == pattern[i:] {
result = append(result, 1)
return result
}
}
}
if v, ok := g.nonSubstrMap[hash]; ok && v == pattern {
result = append(result, 1)
return result
}
if g.ac != nil && g.ac.Match(pattern) {
result = append(result, 1)
return result
}
for _, e := range g.otherMatchers {
if e.m.Match(pattern) {
result = append(result, e.id)
return result
}
}
return result