mirror of
https://github.com/v2fly/v2ray-core.git
synced 2025-01-05 00:47:51 -05:00
feat: Implement Match and MatchAny for all MatcherGroup, IndexMatcher
[common/strmatcher] Implement Match and MatchAny for all MatcherGroup and IndexMatcher
This commit is contained in:
parent
2e0ea88041
commit
f494df2567
58
common/strmatcher/benchmark_indexmatcher_test.go
Normal file
58
common/strmatcher/benchmark_indexmatcher_test.go
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
package strmatcher_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkLinearIndexMatcher(b *testing.B) {
|
||||||
|
benchmarkIndexMatcher(b, func() IndexMatcher {
|
||||||
|
return NewLinearIndexMatcher()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkMphIndexMatcher(b *testing.B) {
|
||||||
|
benchmarkIndexMatcher(b, func() IndexMatcher {
|
||||||
|
return NewMphIndexMatcher()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkIndexMatcher(b *testing.B, ctor func() IndexMatcher) {
|
||||||
|
b.Run("Match", func(b *testing.B) {
|
||||||
|
b.Run("Domain------------", func(b *testing.B) {
|
||||||
|
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true})
|
||||||
|
})
|
||||||
|
b.Run("Domain+Full-------", func(b *testing.B) {
|
||||||
|
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true})
|
||||||
|
})
|
||||||
|
b.Run("Domain+Full+Substr", func(b *testing.B) {
|
||||||
|
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true})
|
||||||
|
})
|
||||||
|
b.Run("All-Fail----------", func(b *testing.B) {
|
||||||
|
benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("Match/Dotless", func(b *testing.B) { // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used.
|
||||||
|
b.Run("All-Succ", func(b *testing.B) {
|
||||||
|
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true, Regex: true})
|
||||||
|
})
|
||||||
|
b.Run("All-Fail", func(b *testing.B) {
|
||||||
|
benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false, Regex: false})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("MatchAny", func(b *testing.B) {
|
||||||
|
b.Run("First-Full--", func(b *testing.B) {
|
||||||
|
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: true, Domain: true, Substr: true})
|
||||||
|
})
|
||||||
|
b.Run("First-Domain", func(b *testing.B) {
|
||||||
|
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: true, Substr: true})
|
||||||
|
})
|
||||||
|
b.Run("First-Substr", func(b *testing.B) {
|
||||||
|
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: true})
|
||||||
|
})
|
||||||
|
b.Run("All-Fail----", func(b *testing.B) {
|
||||||
|
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: false})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
149
common/strmatcher/benchmark_matchers_test.go
Normal file
149
common/strmatcher/benchmark_matchers_test.go
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
package strmatcher_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/v2fly/v2ray-core/v5/common"
|
||||||
|
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkFullMatcher(b *testing.B) {
|
||||||
|
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Full, func() MatcherGroup {
|
||||||
|
return new(SimpleMatcherGroup)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("FullMatcherGroup--------", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Full, func() MatcherGroup {
|
||||||
|
return NewFullMatcherGroup()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Full, func() MatcherGroup {
|
||||||
|
return NewACAutomatonMatcherGroup()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("MphMatcherGroup---------", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Full, func() MatcherGroup {
|
||||||
|
return NewMphMatcherGroup()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkDomainMatcher(b *testing.B) {
|
||||||
|
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Domain, func() MatcherGroup {
|
||||||
|
return new(SimpleMatcherGroup)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("DomainMatcherGroup------", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Domain, func() MatcherGroup {
|
||||||
|
return NewDomainMatcherGroup()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Domain, func() MatcherGroup {
|
||||||
|
return NewACAutomatonMatcherGroup()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("MphMatcherGroup---------", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Domain, func() MatcherGroup {
|
||||||
|
return NewMphMatcherGroup()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkSubstrMatcher(b *testing.B) {
|
||||||
|
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Substr, func() MatcherGroup {
|
||||||
|
return new(SimpleMatcherGroup)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("SubstrMatcherGroup------", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Substr, func() MatcherGroup {
|
||||||
|
return new(SubstrMatcherGroup)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
|
||||||
|
benchmarkMatcherType(b, Substr, func() MatcherGroup {
|
||||||
|
return NewACAutomatonMatcherGroup()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Utility functions for benchmark
|
||||||
|
|
||||||
|
func benchmarkMatcherType(b *testing.B, t Type, ctor func() MatcherGroup) {
|
||||||
|
b.Run("Match", func(b *testing.B) {
|
||||||
|
b.Run("Succ", func(b *testing.B) {
|
||||||
|
benchmarkMatch(b, ctor(), map[Type]bool{t: true})
|
||||||
|
})
|
||||||
|
b.Run("Fail", func(b *testing.B) {
|
||||||
|
benchmarkMatch(b, ctor(), map[Type]bool{t: false})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
b.Run("MatchAny", func(b *testing.B) {
|
||||||
|
b.Run("Succ", func(b *testing.B) {
|
||||||
|
benchmarkMatchAny(b, ctor(), map[Type]bool{t: true})
|
||||||
|
})
|
||||||
|
b.Run("Fail", func(b *testing.B) {
|
||||||
|
benchmarkMatchAny(b, ctor(), map[Type]bool{t: false})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkMatch(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) {
|
||||||
|
prepareMatchers(g, enabledTypes)
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = g.Match("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkMatchAny(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) {
|
||||||
|
prepareMatchers(g, enabledTypes)
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = g.MatchAny("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func prepareMatchers(g MatcherGroup, enabledTypes map[Type]bool) {
|
||||||
|
for matcherType, hasMatch := range enabledTypes {
|
||||||
|
switch matcherType {
|
||||||
|
case Domain:
|
||||||
|
if hasMatch {
|
||||||
|
AddMatcherToGroup(g, DomainMatcher("v2fly.org"), 0)
|
||||||
|
}
|
||||||
|
for i := 1; i < 1024; i++ {
|
||||||
|
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
case Full:
|
||||||
|
if hasMatch {
|
||||||
|
AddMatcherToGroup(g, FullMatcher("0.v2fly.org"), 0)
|
||||||
|
}
|
||||||
|
for i := 1; i < 64; i++ {
|
||||||
|
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
case Substr:
|
||||||
|
if hasMatch {
|
||||||
|
AddMatcherToGroup(g, SubstrMatcher("v2fly.org"), 0)
|
||||||
|
}
|
||||||
|
for i := 1; i < 4; i++ {
|
||||||
|
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
case Regex:
|
||||||
|
matcher, err := Regex.New("^[^.]*$") // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used.
|
||||||
|
common.Must(err)
|
||||||
|
AddMatcherToGroup(g, matcher, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if g, ok := g.(buildable); ok {
|
||||||
|
common.Must(g.Build())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type buildable interface {
|
||||||
|
Build() error
|
||||||
|
}
|
@ -1,161 +0,0 @@
|
|||||||
package strmatcher_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"strconv"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/v2fly/v2ray-core/v5/common"
|
|
||||||
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Benchmark Domain Matcher Groups
|
|
||||||
|
|
||||||
func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) {
|
|
||||||
g := new(SimpleMatcherGroup)
|
|
||||||
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = g.Match("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkDomainMatcherGroup(b *testing.B) {
|
|
||||||
g := new(DomainMatcherGroup)
|
|
||||||
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = g.Match("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) {
|
|
||||||
ac := NewACAutomatonMatcherGroup()
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
ac.Build()
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = ac.MatchAny("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkMphMatcherGroupForDomain(b *testing.B) {
|
|
||||||
mph := NewMphMatcherGroup()
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
mph.Build()
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = mph.MatchAny("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Benchmark Full Matcher Groups
|
|
||||||
|
|
||||||
func BenchmarkSimpleMatcherGroupForFull(b *testing.B) {
|
|
||||||
g := new(SimpleMatcherGroup)
|
|
||||||
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = g.Match("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkFullMatcherGroup(b *testing.B) {
|
|
||||||
g := new(FullMatcherGroup)
|
|
||||||
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = g.Match("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) {
|
|
||||||
ac := NewACAutomatonMatcherGroup()
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
ac.Build()
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = ac.MatchAny("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkMphMatcherGroupFull(b *testing.B) {
|
|
||||||
mph := NewMphMatcherGroup()
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
mph.Build()
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = mph.MatchAny("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Benchmark Substr Matcher Groups
|
|
||||||
|
|
||||||
func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) {
|
|
||||||
g := new(SimpleMatcherGroup)
|
|
||||||
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = g.Match("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) {
|
|
||||||
ac := NewACAutomatonMatcherGroup()
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
|
||||||
}
|
|
||||||
ac.Build()
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = ac.MatchAny("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Benchmark Index Matchers
|
|
||||||
|
|
||||||
func BenchmarkLinearIndexMatcher(b *testing.B) {
|
|
||||||
g := new(LinearIndexMatcher)
|
|
||||||
for i := 1; i <= 1024; i++ {
|
|
||||||
m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
|
|
||||||
common.Must(err)
|
|
||||||
g.Add(m)
|
|
||||||
}
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
_ = g.Match("0.v2fly.org")
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,13 +1,12 @@
|
|||||||
package strmatcher
|
package strmatcher
|
||||||
|
|
||||||
// LinearIndexMatcher is an implementation of IndexMatcher.
|
// LinearIndexMatcher is an implementation of IndexMatcher.
|
||||||
// Empty initialization works.
|
|
||||||
type LinearIndexMatcher struct {
|
type LinearIndexMatcher struct {
|
||||||
count uint32
|
count uint32
|
||||||
fullMatcher FullMatcherGroup
|
full *FullMatcherGroup
|
||||||
domainMatcher DomainMatcherGroup
|
domain *DomainMatcherGroup
|
||||||
substrMatcher SubstrMatcherGroup
|
substr *SubstrMatcherGroup
|
||||||
otherMatchers SimpleMatcherGroup
|
regex *SimpleMatcherGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewLinearIndexMatcher() *LinearIndexMatcher {
|
func NewLinearIndexMatcher() *LinearIndexMatcher {
|
||||||
@ -21,13 +20,25 @@ func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 {
|
|||||||
|
|
||||||
switch matcher := matcher.(type) {
|
switch matcher := matcher.(type) {
|
||||||
case FullMatcher:
|
case FullMatcher:
|
||||||
g.fullMatcher.AddFullMatcher(matcher, index)
|
if g.full == nil {
|
||||||
|
g.full = NewFullMatcherGroup()
|
||||||
|
}
|
||||||
|
g.full.AddFullMatcher(matcher, index)
|
||||||
case DomainMatcher:
|
case DomainMatcher:
|
||||||
g.domainMatcher.AddDomainMatcher(matcher, index)
|
if g.domain == nil {
|
||||||
|
g.domain = NewDomainMatcherGroup()
|
||||||
|
}
|
||||||
|
g.domain.AddDomainMatcher(matcher, index)
|
||||||
case SubstrMatcher:
|
case SubstrMatcher:
|
||||||
g.substrMatcher.AddSubstrMatcher(matcher, index)
|
if g.substr == nil {
|
||||||
|
g.substr = new(SubstrMatcherGroup)
|
||||||
|
}
|
||||||
|
g.substr.AddSubstrMatcher(matcher, index)
|
||||||
default:
|
default:
|
||||||
g.otherMatchers.AddMatcher(matcher, index)
|
if g.regex == nil {
|
||||||
|
g.regex = new(SimpleMatcherGroup)
|
||||||
|
}
|
||||||
|
g.regex.AddMatcher(matcher, index)
|
||||||
}
|
}
|
||||||
|
|
||||||
return index
|
return index
|
||||||
@ -40,17 +51,43 @@ func (*LinearIndexMatcher) Build() error {
|
|||||||
|
|
||||||
// Match implements IndexMatcher.Match.
|
// Match implements IndexMatcher.Match.
|
||||||
func (g *LinearIndexMatcher) Match(input string) []uint32 {
|
func (g *LinearIndexMatcher) Match(input string) []uint32 {
|
||||||
result := []uint32{}
|
// Allocate capacity to prevent matches escaping to heap
|
||||||
result = append(result, g.fullMatcher.Match(input)...)
|
result := make([][]uint32, 0, 5)
|
||||||
result = append(result, g.domainMatcher.Match(input)...)
|
if g.full != nil {
|
||||||
result = append(result, g.substrMatcher.Match(input)...)
|
if matches := g.full.Match(input); len(matches) > 0 {
|
||||||
result = append(result, g.otherMatchers.Match(input)...)
|
result = append(result, matches)
|
||||||
return result
|
}
|
||||||
|
}
|
||||||
|
if g.domain != nil {
|
||||||
|
if matches := g.domain.Match(input); len(matches) > 0 {
|
||||||
|
result = append(result, matches)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if g.substr != nil {
|
||||||
|
if matches := g.substr.Match(input); len(matches) > 0 {
|
||||||
|
result = append(result, matches)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if g.regex != nil {
|
||||||
|
if matches := g.regex.Match(input); len(matches) > 0 {
|
||||||
|
result = append(result, matches)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return CompositeMatches(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchAny implements IndexMatcher.MatchAny.
|
// MatchAny implements IndexMatcher.MatchAny.
|
||||||
func (g *LinearIndexMatcher) MatchAny(input string) bool {
|
func (g *LinearIndexMatcher) MatchAny(input string) bool {
|
||||||
return len(g.Match(input)) > 0
|
if g.full != nil && g.full.MatchAny(input) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if g.domain != nil && g.domain.MatchAny(input) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if g.substr != nil && g.substr.MatchAny(input) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return g.regex != nil && g.regex.MatchAny(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Size implements IndexMatcher.Size.
|
// Size implements IndexMatcher.Size.
|
||||||
|
@ -8,15 +8,11 @@ type MphIndexMatcher struct {
|
|||||||
count uint32
|
count uint32
|
||||||
mph *MphMatcherGroup
|
mph *MphMatcherGroup
|
||||||
ac *ACAutomatonMatcherGroup
|
ac *ACAutomatonMatcherGroup
|
||||||
regex SimpleMatcherGroup
|
regex *SimpleMatcherGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewMphIndexMatcher() *MphIndexMatcher {
|
func NewMphIndexMatcher() *MphIndexMatcher {
|
||||||
return &MphIndexMatcher{
|
return new(MphIndexMatcher)
|
||||||
mph: nil,
|
|
||||||
ac: nil,
|
|
||||||
regex: SimpleMatcherGroup{},
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add implements IndexMatcher.Add.
|
// Add implements IndexMatcher.Add.
|
||||||
@ -41,6 +37,9 @@ func (g *MphIndexMatcher) Add(matcher Matcher) uint32 {
|
|||||||
}
|
}
|
||||||
g.ac.AddSubstrMatcher(matcher, index)
|
g.ac.AddSubstrMatcher(matcher, index)
|
||||||
case *RegexMatcher:
|
case *RegexMatcher:
|
||||||
|
if g.regex == nil {
|
||||||
|
g.regex = &SimpleMatcherGroup{}
|
||||||
|
}
|
||||||
g.regex.AddMatcher(matcher, index)
|
g.regex.AddMatcher(matcher, index)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,8 +58,24 @@ func (g *MphIndexMatcher) Build() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Match implements IndexMatcher.Match.
|
// Match implements IndexMatcher.Match.
|
||||||
func (*MphIndexMatcher) Match(string) []uint32 {
|
func (g *MphIndexMatcher) Match(input string) []uint32 {
|
||||||
return nil
|
result := make([][]uint32, 0, 5)
|
||||||
|
if g.mph != nil {
|
||||||
|
if matches := g.mph.Match(input); len(matches) > 0 {
|
||||||
|
result = append(result, matches)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if g.ac != nil {
|
||||||
|
if matches := g.ac.Match(input); len(matches) > 0 {
|
||||||
|
result = append(result, matches)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if g.regex != nil {
|
||||||
|
if matches := g.regex.Match(input); len(matches) > 0 {
|
||||||
|
result = append(result, matches)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return CompositeMatches(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchAny implements IndexMatcher.MatchAny.
|
// MatchAny implements IndexMatcher.MatchAny.
|
||||||
@ -71,7 +86,7 @@ func (g *MphIndexMatcher) MatchAny(input string) bool {
|
|||||||
if g.ac != nil && g.ac.MatchAny(input) {
|
if g.ac != nil && g.ac.MatchAny(input) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return g.regex.MatchAny(input)
|
return g.regex != nil && g.regex.MatchAny(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Size implements IndexMatcher.Size.
|
// Size implements IndexMatcher.Size.
|
||||||
|
94
common/strmatcher/indexmatcher_mph_test.go
Normal file
94
common/strmatcher/indexmatcher_mph_test.go
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
package strmatcher_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/v2fly/v2ray-core/v5/common"
|
||||||
|
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMphIndexMatcher(t *testing.T) {
|
||||||
|
rules := []struct {
|
||||||
|
Type Type
|
||||||
|
Domain string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
Type: Regex,
|
||||||
|
Domain: "apis\\.us$",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Substr,
|
||||||
|
Domain: "apis",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Domain,
|
||||||
|
Domain: "googleapis.com",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Domain,
|
||||||
|
Domain: "com",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Full,
|
||||||
|
Domain: "www.baidu.com",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Substr,
|
||||||
|
Domain: "apis",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Domain,
|
||||||
|
Domain: "googleapis.com",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Full,
|
||||||
|
Domain: "fonts.googleapis.com",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Full,
|
||||||
|
Domain: "www.baidu.com",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Type: Domain,
|
||||||
|
Domain: "example.com",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cases := []struct {
|
||||||
|
Input string
|
||||||
|
Output []uint32
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
Input: "www.baidu.com",
|
||||||
|
Output: []uint32{5, 9, 4},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Input: "fonts.googleapis.com",
|
||||||
|
Output: []uint32{8, 3, 7, 4, 2, 6},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Input: "example.googleapis.com",
|
||||||
|
Output: []uint32{3, 7, 4, 2, 6},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Input: "testapis.us",
|
||||||
|
Output: []uint32{2, 6, 1},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Input: "example.com",
|
||||||
|
Output: []uint32{10, 4},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
matcherGroup := NewMphIndexMatcher()
|
||||||
|
for _, rule := range rules {
|
||||||
|
matcher, err := rule.Type.New(rule.Domain)
|
||||||
|
common.Must(err)
|
||||||
|
matcherGroup.Add(matcher)
|
||||||
|
}
|
||||||
|
matcherGroup.Build()
|
||||||
|
for _, test := range cases {
|
||||||
|
if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
|
||||||
|
t.Error("unexpected output: ", m, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -127,8 +127,8 @@ func (ac *ACAutomatonMatcherGroup) Build() error {
|
|||||||
|
|
||||||
// Match implements MatcherGroup.Match.
|
// Match implements MatcherGroup.Match.
|
||||||
func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
|
func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
|
||||||
var suffixMatches [][]uint32
|
suffixMatches := make([][]uint32, 0, 5)
|
||||||
var substrMatches [][]uint32
|
substrMatches := make([][]uint32, 0, 5)
|
||||||
fullMatch := true // fullMatch indicates no fail edge traversed so far.
|
fullMatch := true // fullMatch indicates no fail edge traversed so far.
|
||||||
node := &ac.nodes[0] // start from root node.
|
node := &ac.nodes[0] // start from root node.
|
||||||
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
|
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
|
||||||
@ -177,18 +177,10 @@ func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
|
|||||||
suffixMatches = append(suffixMatches, values[Full])
|
suffixMatches = append(suffixMatches, values[Full])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch matches := append(substrMatches, suffixMatches...); len(matches) { // nolint: gocritic
|
if len(substrMatches) == 0 {
|
||||||
case 0:
|
return CompositeMatchesReverse(suffixMatches)
|
||||||
return nil
|
|
||||||
case 1:
|
|
||||||
return matches[0]
|
|
||||||
default:
|
|
||||||
result := []uint32{}
|
|
||||||
for i := len(matches) - 1; i >= 0; i-- {
|
|
||||||
result = append(result, matches[i]...)
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
}
|
||||||
|
return CompositeMatchesReverse(append(substrMatches, suffixMatches...))
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchAny implements MatcherGroup.MatchAny.
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
|
@ -1,101 +1,109 @@
|
|||||||
package strmatcher
|
package strmatcher
|
||||||
|
|
||||||
import "strings"
|
type trieNode struct {
|
||||||
|
|
||||||
func breakDomain(domain string) []string {
|
|
||||||
return strings.Split(domain, ".")
|
|
||||||
}
|
|
||||||
|
|
||||||
type node struct {
|
|
||||||
values []uint32
|
values []uint32
|
||||||
sub map[string]*node
|
children map[string]*trieNode
|
||||||
}
|
}
|
||||||
|
|
||||||
// DomainMatcherGroup is an implementation of MatcherGroup.
|
// DomainMatcherGroup is an implementation of MatcherGroup.
|
||||||
// It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
|
// It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
|
||||||
type DomainMatcherGroup struct {
|
type DomainMatcherGroup struct {
|
||||||
root *node
|
root *trieNode
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewDomainMatcherGroup() *DomainMatcherGroup {
|
||||||
|
return &DomainMatcherGroup{
|
||||||
|
root: new(trieNode),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
|
// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
|
||||||
func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
|
func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
|
||||||
if g.root == nil {
|
node := g.root
|
||||||
g.root = new(node)
|
pattern := matcher.Pattern()
|
||||||
|
for i := len(pattern); i > 0; {
|
||||||
|
var part string
|
||||||
|
for j := i - 1; ; j-- {
|
||||||
|
if pattern[j] == '.' {
|
||||||
|
part = pattern[j+1 : i]
|
||||||
|
i = j
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
if j == 0 {
|
||||||
current := g.root
|
part = pattern[j:i]
|
||||||
parts := breakDomain(matcher.Pattern())
|
i = j
|
||||||
for i := len(parts) - 1; i >= 0; i-- {
|
break
|
||||||
part := parts[i]
|
|
||||||
if current.sub == nil {
|
|
||||||
current.sub = make(map[string]*node)
|
|
||||||
}
|
}
|
||||||
next := current.sub[part]
|
}
|
||||||
|
if node.children == nil {
|
||||||
|
node.children = make(map[string]*trieNode)
|
||||||
|
}
|
||||||
|
next := node.children[part]
|
||||||
if next == nil {
|
if next == nil {
|
||||||
next = new(node)
|
next = new(trieNode)
|
||||||
current.sub[part] = next
|
node.children[part] = next
|
||||||
}
|
}
|
||||||
current = next
|
node = next
|
||||||
}
|
}
|
||||||
|
|
||||||
current.values = append(current.values, value)
|
node.values = append(node.values, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match implements MatcherGroup.Match.
|
// Match implements MatcherGroup.Match.
|
||||||
func (g *DomainMatcherGroup) Match(domain string) []uint32 {
|
func (g *DomainMatcherGroup) Match(input string) []uint32 {
|
||||||
if domain == "" {
|
matches := make([][]uint32, 0, 5)
|
||||||
return nil
|
node := g.root
|
||||||
}
|
for i := len(input); i > 0; {
|
||||||
|
for j := i - 1; ; j-- {
|
||||||
current := g.root
|
if input[j] == '.' { // Domain label found
|
||||||
if current == nil {
|
node = node.children[input[j+1:i]]
|
||||||
return nil
|
i = j
|
||||||
}
|
|
||||||
|
|
||||||
nextPart := func(idx int) int {
|
|
||||||
for i := idx - 1; i >= 0; i-- {
|
|
||||||
if domain[i] == '.' {
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
matches := [][]uint32{}
|
|
||||||
idx := len(domain)
|
|
||||||
for {
|
|
||||||
if idx == -1 || current.sub == nil {
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
if j == 0 { // The last part of domain label
|
||||||
nidx := nextPart(idx)
|
node = node.children[input[j:i]]
|
||||||
part := domain[nidx+1 : idx]
|
i = j
|
||||||
next := current.sub[part]
|
|
||||||
if next == nil {
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
current = next
|
}
|
||||||
idx = nidx
|
if node == nil { // No more match if no trie edge transition
|
||||||
if len(current.values) > 0 {
|
break
|
||||||
matches = append(matches, current.values)
|
}
|
||||||
|
if len(node.values) > 0 { // Found matched matchers
|
||||||
|
matches = append(matches, node.values)
|
||||||
|
}
|
||||||
|
if node.children == nil { // No more match if leaf node reached
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch len(matches) {
|
return CompositeMatchesReverse(matches)
|
||||||
case 0:
|
|
||||||
return nil
|
|
||||||
case 1:
|
|
||||||
return matches[0]
|
|
||||||
default:
|
|
||||||
result := []uint32{}
|
|
||||||
for idx := range matches {
|
|
||||||
// Insert reversely, the subdomain that matches further ranks higher
|
|
||||||
result = append(result, matches[len(matches)-1-idx]...)
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchAny implements MatcherGroup.MatchAny.
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
func (g *DomainMatcherGroup) MatchAny(domain string) bool {
|
func (g *DomainMatcherGroup) MatchAny(input string) bool {
|
||||||
return len(g.Match(domain)) > 0
|
node := g.root
|
||||||
|
for i := len(input); i > 0; {
|
||||||
|
for j := i - 1; ; j-- {
|
||||||
|
if input[j] == '.' {
|
||||||
|
node = node.children[input[j+1:i]]
|
||||||
|
i = j
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if j == 0 {
|
||||||
|
node = node.children[input[j:i]]
|
||||||
|
i = j
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if len(node.values) > 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if node.children == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
@ -82,7 +82,7 @@ func TestDomainMatcherGroup(t *testing.T) {
|
|||||||
Result: []uint32{4, 6},
|
Result: []uint32{4, 6},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
g := new(DomainMatcherGroup)
|
g := NewDomainMatcherGroup()
|
||||||
for _, pattern := range patterns {
|
for _, pattern := range patterns {
|
||||||
AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
|
AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
|
||||||
}
|
}
|
||||||
@ -95,7 +95,7 @@ func TestDomainMatcherGroup(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestEmptyDomainMatcherGroup(t *testing.T) {
|
func TestEmptyDomainMatcherGroup(t *testing.T) {
|
||||||
g := new(DomainMatcherGroup)
|
g := NewDomainMatcherGroup()
|
||||||
r := g.Match("v2fly.org")
|
r := g.Match("v2fly.org")
|
||||||
if len(r) != 0 {
|
if len(r) != 0 {
|
||||||
t.Error("Expect [], but ", r)
|
t.Error("Expect [], but ", r)
|
||||||
|
@ -6,25 +6,25 @@ type FullMatcherGroup struct {
|
|||||||
matchers map[string][]uint32
|
matchers map[string][]uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
|
func NewFullMatcherGroup() *FullMatcherGroup {
|
||||||
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
|
return &FullMatcherGroup{
|
||||||
if g.matchers == nil {
|
matchers: make(map[string][]uint32),
|
||||||
g.matchers = make(map[string][]uint32)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
|
||||||
|
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
|
||||||
domain := matcher.Pattern()
|
domain := matcher.Pattern()
|
||||||
g.matchers[domain] = append(g.matchers[domain], value)
|
g.matchers[domain] = append(g.matchers[domain], value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match implements MatcherGroup.Match.
|
// Match implements MatcherGroup.Match.
|
||||||
func (g *FullMatcherGroup) Match(input string) []uint32 {
|
func (g *FullMatcherGroup) Match(input string) []uint32 {
|
||||||
if g.matchers == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return g.matchers[input]
|
return g.matchers[input]
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchAny implements MatcherGroup.Any.
|
// MatchAny implements MatcherGroup.Any.
|
||||||
func (g *FullMatcherGroup) MatchAny(input string) bool {
|
func (g *FullMatcherGroup) MatchAny(input string) bool {
|
||||||
return len(g.Match(input)) > 0
|
_, found := g.matchers[input]
|
||||||
|
return found
|
||||||
}
|
}
|
||||||
|
@ -50,7 +50,7 @@ func TestFullMatcherGroup(t *testing.T) {
|
|||||||
Result: []uint32{4, 6},
|
Result: []uint32{4, 6},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
g := new(FullMatcherGroup)
|
g := NewFullMatcherGroup()
|
||||||
for _, pattern := range patterns {
|
for _, pattern := range patterns {
|
||||||
AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
|
AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
|
||||||
}
|
}
|
||||||
@ -63,7 +63,7 @@ func TestFullMatcherGroup(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestEmptyFullMatcherGroup(t *testing.T) {
|
func TestEmptyFullMatcherGroup(t *testing.T) {
|
||||||
g := new(FullMatcherGroup)
|
g := NewFullMatcherGroup()
|
||||||
r := g.Match("v2fly.org")
|
r := g.Match("v2fly.org")
|
||||||
if len(r) != 0 {
|
if len(r) != 0 {
|
||||||
t.Error("Expect [], but ", r)
|
t.Error("Expect [], but ", r)
|
||||||
|
@ -152,7 +152,7 @@ func (g *MphMatcherGroup) Lookup(rollingHash uint32, input string) uint32 {
|
|||||||
|
|
||||||
// Match implements MatcherGroup.Match.
|
// Match implements MatcherGroup.Match.
|
||||||
func (g *MphMatcherGroup) Match(input string) []uint32 {
|
func (g *MphMatcherGroup) Match(input string) []uint32 {
|
||||||
matches := [][]uint32{}
|
matches := make([][]uint32, 0, 5)
|
||||||
hash := uint32(0)
|
hash := uint32(0)
|
||||||
for i := len(input) - 1; i >= 0; i-- {
|
for i := len(input) - 1; i >= 0; i-- {
|
||||||
hash = hash*PrimeRK + uint32(input[i])
|
hash = hash*PrimeRK + uint32(input[i])
|
||||||
@ -165,18 +165,7 @@ func (g *MphMatcherGroup) Match(input string) []uint32 {
|
|||||||
if mphIdx := g.Lookup(hash, input); mphIdx != 0 {
|
if mphIdx := g.Lookup(hash, input); mphIdx != 0 {
|
||||||
matches = append(matches, g.values[mphIdx])
|
matches = append(matches, g.values[mphIdx])
|
||||||
}
|
}
|
||||||
switch len(matches) {
|
return CompositeMatchesReverse(matches)
|
||||||
case 0:
|
|
||||||
return nil
|
|
||||||
case 1:
|
|
||||||
return matches[0]
|
|
||||||
default:
|
|
||||||
result := []uint32{}
|
|
||||||
for i := len(matches) - 1; i >= 0; i-- {
|
|
||||||
result = append(result, matches[i]...)
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchAny implements MatcherGroup.MatchAny.
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
|
@ -32,5 +32,10 @@ func (g *SimpleMatcherGroup) Match(input string) []uint32 {
|
|||||||
|
|
||||||
// MatchAny implements MatcherGroup.MatchAny.
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
func (g *SimpleMatcherGroup) MatchAny(input string) bool {
|
func (g *SimpleMatcherGroup) MatchAny(input string) bool {
|
||||||
return len(g.Match(input)) > 0
|
for _, e := range g.matchers {
|
||||||
|
if e.matcher.Match(input) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
@ -20,16 +20,30 @@ func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint3
|
|||||||
|
|
||||||
// Match implements MatcherGroup.Match.
|
// Match implements MatcherGroup.Match.
|
||||||
func (g *SubstrMatcherGroup) Match(input string) []uint32 {
|
func (g *SubstrMatcherGroup) Match(input string) []uint32 {
|
||||||
result := []uint32{}
|
var result []uint32
|
||||||
for i, pattern := range g.patterns {
|
for i, pattern := range g.patterns {
|
||||||
for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
|
for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
|
||||||
result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
|
result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// sort.Slice will trigger allocation no matter what input is. See https://github.com/golang/go/issues/17332
|
||||||
|
// We optimize the sorting by length to prevent memory allocation as possible.
|
||||||
|
switch len(result) {
|
||||||
|
case 0:
|
||||||
|
return nil
|
||||||
|
case 1:
|
||||||
|
// No need to sort
|
||||||
|
case 2:
|
||||||
|
// Do a simple swap if unsorted
|
||||||
|
if result[0] > result[1] {
|
||||||
|
result[0], result[1] = result[1], result[0]
|
||||||
|
}
|
||||||
|
default:
|
||||||
// Sort the match results in dictionary order, so that:
|
// Sort the match results in dictionary order, so that:
|
||||||
// 1. Pattern matched at smaller position (meaning matched further) takes precedence.
|
// 1. Pattern matched at smaller position (meaning matched further) takes precedence.
|
||||||
// 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
|
// 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
|
||||||
sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
|
sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
|
||||||
|
}
|
||||||
for i, entry := range result {
|
for i, entry := range result {
|
||||||
result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
|
result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FullMatcher is an implementation of Matcher.
|
// FullMatcher is an implementation of Matcher.
|
||||||
@ -96,6 +97,10 @@ func (t Type) New(pattern string) (Matcher, error) {
|
|||||||
case Substr:
|
case Substr:
|
||||||
return SubstrMatcher(pattern), nil
|
return SubstrMatcher(pattern), nil
|
||||||
case Domain:
|
case Domain:
|
||||||
|
pattern, err := ToDomain(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
return DomainMatcher(pattern), nil
|
return DomainMatcher(pattern), nil
|
||||||
case Regex: // 1. regex matching is case-sensitive
|
case Regex: // 1. regex matching is case-sensitive
|
||||||
regex, err := regexp.Compile(pattern)
|
regex, err := regexp.Compile(pattern)
|
||||||
@ -104,10 +109,73 @@ func (t Type) New(pattern string) (Matcher, error) {
|
|||||||
}
|
}
|
||||||
return &RegexMatcher{pattern: regex}, nil
|
return &RegexMatcher{pattern: regex}, nil
|
||||||
default:
|
default:
|
||||||
panic("Unknown type")
|
return nil, errors.New("unknown matcher type")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewDomainPattern creates a new Matcher based on the given domain pattern.
|
||||||
|
// It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern.
|
||||||
|
func (t Type) NewDomainPattern(pattern string) (Matcher, error) {
|
||||||
|
switch t {
|
||||||
|
case Full:
|
||||||
|
pattern, err := ToDomain(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return FullMatcher(pattern), nil
|
||||||
|
case Substr:
|
||||||
|
pattern, err := ToDomain(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return SubstrMatcher(pattern), nil
|
||||||
|
case Domain:
|
||||||
|
pattern, err := ToDomain(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return DomainMatcher(pattern), nil
|
||||||
|
case Regex: // Regex's charset not in LDH subset
|
||||||
|
regex, err := regexp.Compile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &RegexMatcher{pattern: regex}, nil
|
||||||
|
default:
|
||||||
|
return nil, errors.New("unknown matcher type")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made.
|
||||||
|
// 1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952):
|
||||||
|
// * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers)
|
||||||
|
// * Digits 0 to 9
|
||||||
|
// * Hyphens(-) and Periods(.)
|
||||||
|
// 2. Non-ASCII characters not supported for now.
|
||||||
|
// * May support Internationalized domain name to Punycode if needed in the future.
|
||||||
|
func ToDomain(pattern string) (string, error) {
|
||||||
|
builder := strings.Builder{}
|
||||||
|
builder.Grow(len(pattern))
|
||||||
|
for i := 0; i < len(pattern); i++ {
|
||||||
|
c := pattern[i]
|
||||||
|
if c >= utf8.RuneSelf {
|
||||||
|
return "", errors.New("non-ASCII characters not supported for now")
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case 'A' <= c && c <= 'Z':
|
||||||
|
c += 'a' - 'A'
|
||||||
|
case 'a' <= c && c <= 'z':
|
||||||
|
case '0' <= c && c <= '9':
|
||||||
|
case c == '-':
|
||||||
|
case c == '.':
|
||||||
|
default:
|
||||||
|
return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset")
|
||||||
|
}
|
||||||
|
builder.WriteByte(c)
|
||||||
|
}
|
||||||
|
return builder.String(), nil
|
||||||
|
}
|
||||||
|
|
||||||
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
|
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
|
||||||
type MatcherGroupForAll interface {
|
type MatcherGroupForAll interface {
|
||||||
AddMatcher(matcher Matcher, value uint32)
|
AddMatcher(matcher Matcher, value uint32)
|
||||||
@ -137,6 +205,10 @@ type MatcherGroupForRegex interface {
|
|||||||
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
|
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
|
||||||
// This function is provided to help writing code to test a MatcherGroup.
|
// This function is provided to help writing code to test a MatcherGroup.
|
||||||
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
||||||
|
if g, ok := g.(IndexMatcher); ok {
|
||||||
|
g.Add(matcher)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
if g, ok := g.(MatcherGroupForAll); ok {
|
if g, ok := g.(MatcherGroupForAll); ok {
|
||||||
g.AddMatcher(matcher, value)
|
g.AddMatcher(matcher, value)
|
||||||
return nil
|
return nil
|
||||||
@ -165,3 +237,40 @@ func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
|||||||
}
|
}
|
||||||
return errors.New("cannot add matcher to matcher group")
|
return errors.New("cannot add matcher to matcher group")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CompositeMatches flattens the matches slice to produce a single matched indices slice.
|
||||||
|
// It is designed to avoid new memory allocation as possible.
|
||||||
|
func CompositeMatches(matches [][]uint32) []uint32 {
|
||||||
|
switch len(matches) {
|
||||||
|
case 0:
|
||||||
|
return nil
|
||||||
|
case 1:
|
||||||
|
return matches[0]
|
||||||
|
default:
|
||||||
|
result := make([]uint32, 0, 5)
|
||||||
|
for i := 0; i < len(matches); i++ {
|
||||||
|
result = append(result, matches[i]...)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CompositeMatches flattens the matches slice to produce a single matched indices slice.
|
||||||
|
// It is designed that:
|
||||||
|
// 1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher.
|
||||||
|
// 2. Indices in the same matcher keeps their original order.
|
||||||
|
// 3. Avoid new memory allocation as possible.
|
||||||
|
func CompositeMatchesReverse(matches [][]uint32) []uint32 {
|
||||||
|
switch len(matches) {
|
||||||
|
case 0:
|
||||||
|
return nil
|
||||||
|
case 1:
|
||||||
|
return matches[0]
|
||||||
|
default:
|
||||||
|
result := make([]uint32, 0, 5)
|
||||||
|
for i := len(matches) - 1; i >= 0; i-- {
|
||||||
|
result = append(result, matches[i]...)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user