mirror of
https://github.com/v2fly/v2ray-core.git
synced 2024-12-30 05:56:54 -05:00
A memory-efficient and fast hybrid matcher (#639)
* a faster DomainMatcher implementation * rename benchmark name * fix linting errors * add hybrid matcher * add rabin-karp algorithm * rename test & fix linting errors * add more comment * format code * revert `MatcherGroup` match func * fix linting errors
This commit is contained in:
parent
49cf614346
commit
a31a8e6f89
@ -399,7 +399,7 @@ func TestChinaSites(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkACDomainMatcher(b *testing.B) {
|
||||
func BenchmarkHybridDomainMatcher(b *testing.B) {
|
||||
domains, err := loadGeoSite("CN")
|
||||
common.Must(err)
|
||||
|
||||
|
@ -4,6 +4,9 @@ import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// PrimeRK is the prime base used in Rabin-Karp algorithm.
|
||||
const PrimeRK = 16777619
|
||||
|
||||
// Matcher is the interface to determine a string matches a pattern.
|
||||
type Matcher interface {
|
||||
// Match returns true if the given string matches a predefined pattern.
|
||||
@ -61,20 +64,42 @@ type matcherEntry struct {
|
||||
type ACAutomatonMatcherGroup struct {
|
||||
count uint32
|
||||
ac *ACAutomaton
|
||||
nonSubstrMap map[uint32]string
|
||||
otherMatchers []matcherEntry
|
||||
}
|
||||
|
||||
func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
|
||||
var g = new(ACAutomatonMatcherGroup)
|
||||
g.count = 1
|
||||
g.ac = NewACAutomaton()
|
||||
g.nonSubstrMap = map[uint32]string{}
|
||||
return g
|
||||
}
|
||||
|
||||
// Add `full` or `domain` pattern to hashmap
|
||||
func (g *ACAutomatonMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
|
||||
h := uint32(0)
|
||||
for i := len(pattern) - 1; i >= 0; i-- {
|
||||
h = h*PrimeRK + uint32(pattern[i])
|
||||
}
|
||||
switch t {
|
||||
case Full:
|
||||
g.nonSubstrMap[h] = pattern
|
||||
case Domain:
|
||||
g.nonSubstrMap[h] = pattern
|
||||
g.nonSubstrMap[h*PrimeRK+uint32('.')] = "." + pattern
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
|
||||
switch t {
|
||||
case Full, Substr, Domain:
|
||||
case Substr:
|
||||
if g.ac == nil {
|
||||
g.ac = NewACAutomaton()
|
||||
}
|
||||
g.ac.Add(pattern, t)
|
||||
case Full, Domain:
|
||||
g.AddFullOrDomainPattern(pattern, t)
|
||||
case Regex:
|
||||
g.count++
|
||||
r, err := regexp.Compile(pattern)
|
||||
@ -92,18 +117,36 @@ func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, er
|
||||
}
|
||||
|
||||
func (g *ACAutomatonMatcherGroup) Build() {
|
||||
g.ac.Build()
|
||||
if g.ac != nil {
|
||||
g.ac.Build()
|
||||
}
|
||||
}
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (g *ACAutomatonMatcherGroup) Match(pattern string) []uint32 {
|
||||
result := []uint32{}
|
||||
if g.ac.Match(pattern) {
|
||||
hash := uint32(0)
|
||||
for i := len(pattern) - 1; i >= 0; i-- {
|
||||
hash = hash*PrimeRK + uint32(pattern[i])
|
||||
if pattern[i] == '.' {
|
||||
if v, ok := g.nonSubstrMap[hash]; ok && v == pattern[i:] {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
}
|
||||
}
|
||||
if v, ok := g.nonSubstrMap[hash]; ok && v == pattern {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
if g.ac != nil && g.ac.Match(pattern) {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
for _, e := range g.otherMatchers {
|
||||
if e.m.Match(pattern) {
|
||||
result = append(result, e.id)
|
||||
return result
|
||||
}
|
||||
}
|
||||
return result
|
||||
|
Loading…
Reference in New Issue
Block a user