mirror of
https://github.com/v2fly/v2ray-core.git
synced 2025-01-03 07:56:42 -05:00
feat: Implement Match and MatchAny for all MatcherGroup, IndexMatcher
[common/strmatcher] Implement Match and MatchAny for all MatcherGroup and IndexMatcher
This commit is contained in:
parent
2e0ea88041
commit
f494df2567
58
common/strmatcher/benchmark_indexmatcher_test.go
Normal file
58
common/strmatcher/benchmark_indexmatcher_test.go
Normal file
@ -0,0 +1,58 @@
|
||||
package strmatcher_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
|
||||
)
|
||||
|
||||
func BenchmarkLinearIndexMatcher(b *testing.B) {
|
||||
benchmarkIndexMatcher(b, func() IndexMatcher {
|
||||
return NewLinearIndexMatcher()
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkMphIndexMatcher(b *testing.B) {
|
||||
benchmarkIndexMatcher(b, func() IndexMatcher {
|
||||
return NewMphIndexMatcher()
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkIndexMatcher(b *testing.B, ctor func() IndexMatcher) {
|
||||
b.Run("Match", func(b *testing.B) {
|
||||
b.Run("Domain------------", func(b *testing.B) {
|
||||
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true})
|
||||
})
|
||||
b.Run("Domain+Full-------", func(b *testing.B) {
|
||||
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true})
|
||||
})
|
||||
b.Run("Domain+Full+Substr", func(b *testing.B) {
|
||||
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true})
|
||||
})
|
||||
b.Run("All-Fail----------", func(b *testing.B) {
|
||||
benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false})
|
||||
})
|
||||
})
|
||||
b.Run("Match/Dotless", func(b *testing.B) { // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used.
|
||||
b.Run("All-Succ", func(b *testing.B) {
|
||||
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true, Regex: true})
|
||||
})
|
||||
b.Run("All-Fail", func(b *testing.B) {
|
||||
benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false, Regex: false})
|
||||
})
|
||||
})
|
||||
b.Run("MatchAny", func(b *testing.B) {
|
||||
b.Run("First-Full--", func(b *testing.B) {
|
||||
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: true, Domain: true, Substr: true})
|
||||
})
|
||||
b.Run("First-Domain", func(b *testing.B) {
|
||||
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: true, Substr: true})
|
||||
})
|
||||
b.Run("First-Substr", func(b *testing.B) {
|
||||
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: true})
|
||||
})
|
||||
b.Run("All-Fail----", func(b *testing.B) {
|
||||
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: false})
|
||||
})
|
||||
})
|
||||
}
|
149
common/strmatcher/benchmark_matchers_test.go
Normal file
149
common/strmatcher/benchmark_matchers_test.go
Normal file
@ -0,0 +1,149 @@
|
||||
package strmatcher_test
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/v2fly/v2ray-core/v5/common"
|
||||
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
|
||||
)
|
||||
|
||||
func BenchmarkFullMatcher(b *testing.B) {
|
||||
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Full, func() MatcherGroup {
|
||||
return new(SimpleMatcherGroup)
|
||||
})
|
||||
})
|
||||
b.Run("FullMatcherGroup--------", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Full, func() MatcherGroup {
|
||||
return NewFullMatcherGroup()
|
||||
})
|
||||
})
|
||||
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Full, func() MatcherGroup {
|
||||
return NewACAutomatonMatcherGroup()
|
||||
})
|
||||
})
|
||||
b.Run("MphMatcherGroup---------", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Full, func() MatcherGroup {
|
||||
return NewMphMatcherGroup()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkDomainMatcher(b *testing.B) {
|
||||
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Domain, func() MatcherGroup {
|
||||
return new(SimpleMatcherGroup)
|
||||
})
|
||||
})
|
||||
b.Run("DomainMatcherGroup------", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Domain, func() MatcherGroup {
|
||||
return NewDomainMatcherGroup()
|
||||
})
|
||||
})
|
||||
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Domain, func() MatcherGroup {
|
||||
return NewACAutomatonMatcherGroup()
|
||||
})
|
||||
})
|
||||
b.Run("MphMatcherGroup---------", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Domain, func() MatcherGroup {
|
||||
return NewMphMatcherGroup()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkSubstrMatcher(b *testing.B) {
|
||||
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Substr, func() MatcherGroup {
|
||||
return new(SimpleMatcherGroup)
|
||||
})
|
||||
})
|
||||
b.Run("SubstrMatcherGroup------", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Substr, func() MatcherGroup {
|
||||
return new(SubstrMatcherGroup)
|
||||
})
|
||||
})
|
||||
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
|
||||
benchmarkMatcherType(b, Substr, func() MatcherGroup {
|
||||
return NewACAutomatonMatcherGroup()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// Utility functions for benchmark
|
||||
|
||||
func benchmarkMatcherType(b *testing.B, t Type, ctor func() MatcherGroup) {
|
||||
b.Run("Match", func(b *testing.B) {
|
||||
b.Run("Succ", func(b *testing.B) {
|
||||
benchmarkMatch(b, ctor(), map[Type]bool{t: true})
|
||||
})
|
||||
b.Run("Fail", func(b *testing.B) {
|
||||
benchmarkMatch(b, ctor(), map[Type]bool{t: false})
|
||||
})
|
||||
})
|
||||
b.Run("MatchAny", func(b *testing.B) {
|
||||
b.Run("Succ", func(b *testing.B) {
|
||||
benchmarkMatchAny(b, ctor(), map[Type]bool{t: true})
|
||||
})
|
||||
b.Run("Fail", func(b *testing.B) {
|
||||
benchmarkMatchAny(b, ctor(), map[Type]bool{t: false})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkMatch(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) {
|
||||
prepareMatchers(g, enabledTypes)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkMatchAny(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) {
|
||||
prepareMatchers(g, enabledTypes)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func prepareMatchers(g MatcherGroup, enabledTypes map[Type]bool) {
|
||||
for matcherType, hasMatch := range enabledTypes {
|
||||
switch matcherType {
|
||||
case Domain:
|
||||
if hasMatch {
|
||||
AddMatcherToGroup(g, DomainMatcher("v2fly.org"), 0)
|
||||
}
|
||||
for i := 1; i < 1024; i++ {
|
||||
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
case Full:
|
||||
if hasMatch {
|
||||
AddMatcherToGroup(g, FullMatcher("0.v2fly.org"), 0)
|
||||
}
|
||||
for i := 1; i < 64; i++ {
|
||||
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
case Substr:
|
||||
if hasMatch {
|
||||
AddMatcherToGroup(g, SubstrMatcher("v2fly.org"), 0)
|
||||
}
|
||||
for i := 1; i < 4; i++ {
|
||||
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
case Regex:
|
||||
matcher, err := Regex.New("^[^.]*$") // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used.
|
||||
common.Must(err)
|
||||
AddMatcherToGroup(g, matcher, 0)
|
||||
}
|
||||
}
|
||||
if g, ok := g.(buildable); ok {
|
||||
common.Must(g.Build())
|
||||
}
|
||||
}
|
||||
|
||||
type buildable interface {
|
||||
Build() error
|
||||
}
|
@ -1,161 +0,0 @@
|
||||
package strmatcher_test
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/v2fly/v2ray-core/v5/common"
|
||||
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
|
||||
)
|
||||
|
||||
// Benchmark Domain Matcher Groups
|
||||
|
||||
func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) {
|
||||
g := new(SimpleMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkDomainMatcherGroup(b *testing.B) {
|
||||
g := new(DomainMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) {
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
ac.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ac.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMphMatcherGroupForDomain(b *testing.B) {
|
||||
mph := NewMphMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
mph.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = mph.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark Full Matcher Groups
|
||||
|
||||
func BenchmarkSimpleMatcherGroupForFull(b *testing.B) {
|
||||
g := new(SimpleMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFullMatcherGroup(b *testing.B) {
|
||||
g := new(FullMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) {
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
ac.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ac.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMphMatcherGroupFull(b *testing.B) {
|
||||
mph := NewMphMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
mph.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = mph.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark Substr Matcher Groups
|
||||
|
||||
func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) {
|
||||
g := new(SimpleMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) {
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
ac.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ac.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark Index Matchers
|
||||
|
||||
func BenchmarkLinearIndexMatcher(b *testing.B) {
|
||||
g := new(LinearIndexMatcher)
|
||||
for i := 1; i <= 1024; i++ {
|
||||
m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
|
||||
common.Must(err)
|
||||
g.Add(m)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
@ -1,13 +1,12 @@
|
||||
package strmatcher
|
||||
|
||||
// LinearIndexMatcher is an implementation of IndexMatcher.
|
||||
// Empty initialization works.
|
||||
type LinearIndexMatcher struct {
|
||||
count uint32
|
||||
fullMatcher FullMatcherGroup
|
||||
domainMatcher DomainMatcherGroup
|
||||
substrMatcher SubstrMatcherGroup
|
||||
otherMatchers SimpleMatcherGroup
|
||||
full *FullMatcherGroup
|
||||
domain *DomainMatcherGroup
|
||||
substr *SubstrMatcherGroup
|
||||
regex *SimpleMatcherGroup
|
||||
}
|
||||
|
||||
func NewLinearIndexMatcher() *LinearIndexMatcher {
|
||||
@ -21,13 +20,25 @@ func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 {
|
||||
|
||||
switch matcher := matcher.(type) {
|
||||
case FullMatcher:
|
||||
g.fullMatcher.AddFullMatcher(matcher, index)
|
||||
if g.full == nil {
|
||||
g.full = NewFullMatcherGroup()
|
||||
}
|
||||
g.full.AddFullMatcher(matcher, index)
|
||||
case DomainMatcher:
|
||||
g.domainMatcher.AddDomainMatcher(matcher, index)
|
||||
if g.domain == nil {
|
||||
g.domain = NewDomainMatcherGroup()
|
||||
}
|
||||
g.domain.AddDomainMatcher(matcher, index)
|
||||
case SubstrMatcher:
|
||||
g.substrMatcher.AddSubstrMatcher(matcher, index)
|
||||
if g.substr == nil {
|
||||
g.substr = new(SubstrMatcherGroup)
|
||||
}
|
||||
g.substr.AddSubstrMatcher(matcher, index)
|
||||
default:
|
||||
g.otherMatchers.AddMatcher(matcher, index)
|
||||
if g.regex == nil {
|
||||
g.regex = new(SimpleMatcherGroup)
|
||||
}
|
||||
g.regex.AddMatcher(matcher, index)
|
||||
}
|
||||
|
||||
return index
|
||||
@ -40,17 +51,43 @@ func (*LinearIndexMatcher) Build() error {
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (g *LinearIndexMatcher) Match(input string) []uint32 {
|
||||
result := []uint32{}
|
||||
result = append(result, g.fullMatcher.Match(input)...)
|
||||
result = append(result, g.domainMatcher.Match(input)...)
|
||||
result = append(result, g.substrMatcher.Match(input)...)
|
||||
result = append(result, g.otherMatchers.Match(input)...)
|
||||
return result
|
||||
// Allocate capacity to prevent matches escaping to heap
|
||||
result := make([][]uint32, 0, 5)
|
||||
if g.full != nil {
|
||||
if matches := g.full.Match(input); len(matches) > 0 {
|
||||
result = append(result, matches)
|
||||
}
|
||||
}
|
||||
if g.domain != nil {
|
||||
if matches := g.domain.Match(input); len(matches) > 0 {
|
||||
result = append(result, matches)
|
||||
}
|
||||
}
|
||||
if g.substr != nil {
|
||||
if matches := g.substr.Match(input); len(matches) > 0 {
|
||||
result = append(result, matches)
|
||||
}
|
||||
}
|
||||
if g.regex != nil {
|
||||
if matches := g.regex.Match(input); len(matches) > 0 {
|
||||
result = append(result, matches)
|
||||
}
|
||||
}
|
||||
return CompositeMatches(result)
|
||||
}
|
||||
|
||||
// MatchAny implements IndexMatcher.MatchAny.
|
||||
func (g *LinearIndexMatcher) MatchAny(input string) bool {
|
||||
return len(g.Match(input)) > 0
|
||||
if g.full != nil && g.full.MatchAny(input) {
|
||||
return true
|
||||
}
|
||||
if g.domain != nil && g.domain.MatchAny(input) {
|
||||
return true
|
||||
}
|
||||
if g.substr != nil && g.substr.MatchAny(input) {
|
||||
return true
|
||||
}
|
||||
return g.regex != nil && g.regex.MatchAny(input)
|
||||
}
|
||||
|
||||
// Size implements IndexMatcher.Size.
|
||||
|
@ -8,15 +8,11 @@ type MphIndexMatcher struct {
|
||||
count uint32
|
||||
mph *MphMatcherGroup
|
||||
ac *ACAutomatonMatcherGroup
|
||||
regex SimpleMatcherGroup
|
||||
regex *SimpleMatcherGroup
|
||||
}
|
||||
|
||||
func NewMphIndexMatcher() *MphIndexMatcher {
|
||||
return &MphIndexMatcher{
|
||||
mph: nil,
|
||||
ac: nil,
|
||||
regex: SimpleMatcherGroup{},
|
||||
}
|
||||
return new(MphIndexMatcher)
|
||||
}
|
||||
|
||||
// Add implements IndexMatcher.Add.
|
||||
@ -41,6 +37,9 @@ func (g *MphIndexMatcher) Add(matcher Matcher) uint32 {
|
||||
}
|
||||
g.ac.AddSubstrMatcher(matcher, index)
|
||||
case *RegexMatcher:
|
||||
if g.regex == nil {
|
||||
g.regex = &SimpleMatcherGroup{}
|
||||
}
|
||||
g.regex.AddMatcher(matcher, index)
|
||||
}
|
||||
|
||||
@ -59,8 +58,24 @@ func (g *MphIndexMatcher) Build() error {
|
||||
}
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (*MphIndexMatcher) Match(string) []uint32 {
|
||||
return nil
|
||||
func (g *MphIndexMatcher) Match(input string) []uint32 {
|
||||
result := make([][]uint32, 0, 5)
|
||||
if g.mph != nil {
|
||||
if matches := g.mph.Match(input); len(matches) > 0 {
|
||||
result = append(result, matches)
|
||||
}
|
||||
}
|
||||
if g.ac != nil {
|
||||
if matches := g.ac.Match(input); len(matches) > 0 {
|
||||
result = append(result, matches)
|
||||
}
|
||||
}
|
||||
if g.regex != nil {
|
||||
if matches := g.regex.Match(input); len(matches) > 0 {
|
||||
result = append(result, matches)
|
||||
}
|
||||
}
|
||||
return CompositeMatches(result)
|
||||
}
|
||||
|
||||
// MatchAny implements IndexMatcher.MatchAny.
|
||||
@ -71,7 +86,7 @@ func (g *MphIndexMatcher) MatchAny(input string) bool {
|
||||
if g.ac != nil && g.ac.MatchAny(input) {
|
||||
return true
|
||||
}
|
||||
return g.regex.MatchAny(input)
|
||||
return g.regex != nil && g.regex.MatchAny(input)
|
||||
}
|
||||
|
||||
// Size implements IndexMatcher.Size.
|
||||
|
94
common/strmatcher/indexmatcher_mph_test.go
Normal file
94
common/strmatcher/indexmatcher_mph_test.go
Normal file
@ -0,0 +1,94 @@
|
||||
package strmatcher_test
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/v2fly/v2ray-core/v5/common"
|
||||
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
|
||||
)
|
||||
|
||||
func TestMphIndexMatcher(t *testing.T) {
|
||||
rules := []struct {
|
||||
Type Type
|
||||
Domain string
|
||||
}{
|
||||
{
|
||||
Type: Regex,
|
||||
Domain: "apis\\.us$",
|
||||
},
|
||||
{
|
||||
Type: Substr,
|
||||
Domain: "apis",
|
||||
},
|
||||
{
|
||||
Type: Domain,
|
||||
Domain: "googleapis.com",
|
||||
},
|
||||
{
|
||||
Type: Domain,
|
||||
Domain: "com",
|
||||
},
|
||||
{
|
||||
Type: Full,
|
||||
Domain: "www.baidu.com",
|
||||
},
|
||||
{
|
||||
Type: Substr,
|
||||
Domain: "apis",
|
||||
},
|
||||
{
|
||||
Type: Domain,
|
||||
Domain: "googleapis.com",
|
||||
},
|
||||
{
|
||||
Type: Full,
|
||||
Domain: "fonts.googleapis.com",
|
||||
},
|
||||
{
|
||||
Type: Full,
|
||||
Domain: "www.baidu.com",
|
||||
},
|
||||
{
|
||||
Type: Domain,
|
||||
Domain: "example.com",
|
||||
},
|
||||
}
|
||||
cases := []struct {
|
||||
Input string
|
||||
Output []uint32
|
||||
}{
|
||||
{
|
||||
Input: "www.baidu.com",
|
||||
Output: []uint32{5, 9, 4},
|
||||
},
|
||||
{
|
||||
Input: "fonts.googleapis.com",
|
||||
Output: []uint32{8, 3, 7, 4, 2, 6},
|
||||
},
|
||||
{
|
||||
Input: "example.googleapis.com",
|
||||
Output: []uint32{3, 7, 4, 2, 6},
|
||||
},
|
||||
{
|
||||
Input: "testapis.us",
|
||||
Output: []uint32{2, 6, 1},
|
||||
},
|
||||
{
|
||||
Input: "example.com",
|
||||
Output: []uint32{10, 4},
|
||||
},
|
||||
}
|
||||
matcherGroup := NewMphIndexMatcher()
|
||||
for _, rule := range rules {
|
||||
matcher, err := rule.Type.New(rule.Domain)
|
||||
common.Must(err)
|
||||
matcherGroup.Add(matcher)
|
||||
}
|
||||
matcherGroup.Build()
|
||||
for _, test := range cases {
|
||||
if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
@ -127,8 +127,8 @@ func (ac *ACAutomatonMatcherGroup) Build() error {
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
|
||||
var suffixMatches [][]uint32
|
||||
var substrMatches [][]uint32
|
||||
suffixMatches := make([][]uint32, 0, 5)
|
||||
substrMatches := make([][]uint32, 0, 5)
|
||||
fullMatch := true // fullMatch indicates no fail edge traversed so far.
|
||||
node := &ac.nodes[0] // start from root node.
|
||||
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
|
||||
@ -177,18 +177,10 @@ func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
|
||||
suffixMatches = append(suffixMatches, values[Full])
|
||||
}
|
||||
}
|
||||
switch matches := append(substrMatches, suffixMatches...); len(matches) { // nolint: gocritic
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
return matches[0]
|
||||
default:
|
||||
result := []uint32{}
|
||||
for i := len(matches) - 1; i >= 0; i-- {
|
||||
result = append(result, matches[i]...)
|
||||
}
|
||||
return result
|
||||
if len(substrMatches) == 0 {
|
||||
return CompositeMatchesReverse(suffixMatches)
|
||||
}
|
||||
return CompositeMatchesReverse(append(substrMatches, suffixMatches...))
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
|
@ -1,101 +1,109 @@
|
||||
package strmatcher
|
||||
|
||||
import "strings"
|
||||
|
||||
func breakDomain(domain string) []string {
|
||||
return strings.Split(domain, ".")
|
||||
}
|
||||
|
||||
type node struct {
|
||||
type trieNode struct {
|
||||
values []uint32
|
||||
sub map[string]*node
|
||||
children map[string]*trieNode
|
||||
}
|
||||
|
||||
// DomainMatcherGroup is an implementation of MatcherGroup.
|
||||
// It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
|
||||
type DomainMatcherGroup struct {
|
||||
root *node
|
||||
root *trieNode
|
||||
}
|
||||
|
||||
func NewDomainMatcherGroup() *DomainMatcherGroup {
|
||||
return &DomainMatcherGroup{
|
||||
root: new(trieNode),
|
||||
}
|
||||
}
|
||||
|
||||
// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
|
||||
func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
|
||||
if g.root == nil {
|
||||
g.root = new(node)
|
||||
node := g.root
|
||||
pattern := matcher.Pattern()
|
||||
for i := len(pattern); i > 0; {
|
||||
var part string
|
||||
for j := i - 1; ; j-- {
|
||||
if pattern[j] == '.' {
|
||||
part = pattern[j+1 : i]
|
||||
i = j
|
||||
break
|
||||
}
|
||||
|
||||
current := g.root
|
||||
parts := breakDomain(matcher.Pattern())
|
||||
for i := len(parts) - 1; i >= 0; i-- {
|
||||
part := parts[i]
|
||||
if current.sub == nil {
|
||||
current.sub = make(map[string]*node)
|
||||
if j == 0 {
|
||||
part = pattern[j:i]
|
||||
i = j
|
||||
break
|
||||
}
|
||||
next := current.sub[part]
|
||||
}
|
||||
if node.children == nil {
|
||||
node.children = make(map[string]*trieNode)
|
||||
}
|
||||
next := node.children[part]
|
||||
if next == nil {
|
||||
next = new(node)
|
||||
current.sub[part] = next
|
||||
next = new(trieNode)
|
||||
node.children[part] = next
|
||||
}
|
||||
current = next
|
||||
node = next
|
||||
}
|
||||
|
||||
current.values = append(current.values, value)
|
||||
node.values = append(node.values, value)
|
||||
}
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (g *DomainMatcherGroup) Match(domain string) []uint32 {
|
||||
if domain == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
current := g.root
|
||||
if current == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
nextPart := func(idx int) int {
|
||||
for i := idx - 1; i >= 0; i-- {
|
||||
if domain[i] == '.' {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
matches := [][]uint32{}
|
||||
idx := len(domain)
|
||||
for {
|
||||
if idx == -1 || current.sub == nil {
|
||||
func (g *DomainMatcherGroup) Match(input string) []uint32 {
|
||||
matches := make([][]uint32, 0, 5)
|
||||
node := g.root
|
||||
for i := len(input); i > 0; {
|
||||
for j := i - 1; ; j-- {
|
||||
if input[j] == '.' { // Domain label found
|
||||
node = node.children[input[j+1:i]]
|
||||
i = j
|
||||
break
|
||||
}
|
||||
|
||||
nidx := nextPart(idx)
|
||||
part := domain[nidx+1 : idx]
|
||||
next := current.sub[part]
|
||||
if next == nil {
|
||||
if j == 0 { // The last part of domain label
|
||||
node = node.children[input[j:i]]
|
||||
i = j
|
||||
break
|
||||
}
|
||||
current = next
|
||||
idx = nidx
|
||||
if len(current.values) > 0 {
|
||||
matches = append(matches, current.values)
|
||||
}
|
||||
if node == nil { // No more match if no trie edge transition
|
||||
break
|
||||
}
|
||||
if len(node.values) > 0 { // Found matched matchers
|
||||
matches = append(matches, node.values)
|
||||
}
|
||||
if node.children == nil { // No more match if leaf node reached
|
||||
break
|
||||
}
|
||||
}
|
||||
switch len(matches) {
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
return matches[0]
|
||||
default:
|
||||
result := []uint32{}
|
||||
for idx := range matches {
|
||||
// Insert reversely, the subdomain that matches further ranks higher
|
||||
result = append(result, matches[len(matches)-1-idx]...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
return CompositeMatchesReverse(matches)
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
func (g *DomainMatcherGroup) MatchAny(domain string) bool {
|
||||
return len(g.Match(domain)) > 0
|
||||
func (g *DomainMatcherGroup) MatchAny(input string) bool {
|
||||
node := g.root
|
||||
for i := len(input); i > 0; {
|
||||
for j := i - 1; ; j-- {
|
||||
if input[j] == '.' {
|
||||
node = node.children[input[j+1:i]]
|
||||
i = j
|
||||
break
|
||||
}
|
||||
if j == 0 {
|
||||
node = node.children[input[j:i]]
|
||||
i = j
|
||||
break
|
||||
}
|
||||
}
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
if len(node.values) > 0 {
|
||||
return true
|
||||
}
|
||||
if node.children == nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ func TestDomainMatcherGroup(t *testing.T) {
|
||||
Result: []uint32{4, 6},
|
||||
},
|
||||
}
|
||||
g := new(DomainMatcherGroup)
|
||||
g := NewDomainMatcherGroup()
|
||||
for _, pattern := range patterns {
|
||||
AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
|
||||
}
|
||||
@ -95,7 +95,7 @@ func TestDomainMatcherGroup(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestEmptyDomainMatcherGroup(t *testing.T) {
|
||||
g := new(DomainMatcherGroup)
|
||||
g := NewDomainMatcherGroup()
|
||||
r := g.Match("v2fly.org")
|
||||
if len(r) != 0 {
|
||||
t.Error("Expect [], but ", r)
|
||||
|
@ -6,25 +6,25 @@ type FullMatcherGroup struct {
|
||||
matchers map[string][]uint32
|
||||
}
|
||||
|
||||
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
|
||||
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
|
||||
if g.matchers == nil {
|
||||
g.matchers = make(map[string][]uint32)
|
||||
func NewFullMatcherGroup() *FullMatcherGroup {
|
||||
return &FullMatcherGroup{
|
||||
matchers: make(map[string][]uint32),
|
||||
}
|
||||
}
|
||||
|
||||
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
|
||||
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
|
||||
domain := matcher.Pattern()
|
||||
g.matchers[domain] = append(g.matchers[domain], value)
|
||||
}
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (g *FullMatcherGroup) Match(input string) []uint32 {
|
||||
if g.matchers == nil {
|
||||
return nil
|
||||
}
|
||||
return g.matchers[input]
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.Any.
|
||||
func (g *FullMatcherGroup) MatchAny(input string) bool {
|
||||
return len(g.Match(input)) > 0
|
||||
_, found := g.matchers[input]
|
||||
return found
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ func TestFullMatcherGroup(t *testing.T) {
|
||||
Result: []uint32{4, 6},
|
||||
},
|
||||
}
|
||||
g := new(FullMatcherGroup)
|
||||
g := NewFullMatcherGroup()
|
||||
for _, pattern := range patterns {
|
||||
AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
|
||||
}
|
||||
@ -63,7 +63,7 @@ func TestFullMatcherGroup(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestEmptyFullMatcherGroup(t *testing.T) {
|
||||
g := new(FullMatcherGroup)
|
||||
g := NewFullMatcherGroup()
|
||||
r := g.Match("v2fly.org")
|
||||
if len(r) != 0 {
|
||||
t.Error("Expect [], but ", r)
|
||||
|
@ -152,7 +152,7 @@ func (g *MphMatcherGroup) Lookup(rollingHash uint32, input string) uint32 {
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (g *MphMatcherGroup) Match(input string) []uint32 {
|
||||
matches := [][]uint32{}
|
||||
matches := make([][]uint32, 0, 5)
|
||||
hash := uint32(0)
|
||||
for i := len(input) - 1; i >= 0; i-- {
|
||||
hash = hash*PrimeRK + uint32(input[i])
|
||||
@ -165,18 +165,7 @@ func (g *MphMatcherGroup) Match(input string) []uint32 {
|
||||
if mphIdx := g.Lookup(hash, input); mphIdx != 0 {
|
||||
matches = append(matches, g.values[mphIdx])
|
||||
}
|
||||
switch len(matches) {
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
return matches[0]
|
||||
default:
|
||||
result := []uint32{}
|
||||
for i := len(matches) - 1; i >= 0; i-- {
|
||||
result = append(result, matches[i]...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
return CompositeMatchesReverse(matches)
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
|
@ -32,5 +32,10 @@ func (g *SimpleMatcherGroup) Match(input string) []uint32 {
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
func (g *SimpleMatcherGroup) MatchAny(input string) bool {
|
||||
return len(g.Match(input)) > 0
|
||||
for _, e := range g.matchers {
|
||||
if e.matcher.Match(input) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -20,16 +20,30 @@ func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint3
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (g *SubstrMatcherGroup) Match(input string) []uint32 {
|
||||
result := []uint32{}
|
||||
var result []uint32
|
||||
for i, pattern := range g.patterns {
|
||||
for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
|
||||
result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
|
||||
}
|
||||
}
|
||||
// sort.Slice will trigger allocation no matter what input is. See https://github.com/golang/go/issues/17332
|
||||
// We optimize the sorting by length to prevent memory allocation as possible.
|
||||
switch len(result) {
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
// No need to sort
|
||||
case 2:
|
||||
// Do a simple swap if unsorted
|
||||
if result[0] > result[1] {
|
||||
result[0], result[1] = result[1], result[0]
|
||||
}
|
||||
default:
|
||||
// Sort the match results in dictionary order, so that:
|
||||
// 1. Pattern matched at smaller position (meaning matched further) takes precedence.
|
||||
// 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
|
||||
sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
|
||||
}
|
||||
for i, entry := range result {
|
||||
result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"errors"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// FullMatcher is an implementation of Matcher.
|
||||
@ -96,6 +97,10 @@ func (t Type) New(pattern string) (Matcher, error) {
|
||||
case Substr:
|
||||
return SubstrMatcher(pattern), nil
|
||||
case Domain:
|
||||
pattern, err := ToDomain(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return DomainMatcher(pattern), nil
|
||||
case Regex: // 1. regex matching is case-sensitive
|
||||
regex, err := regexp.Compile(pattern)
|
||||
@ -104,10 +109,73 @@ func (t Type) New(pattern string) (Matcher, error) {
|
||||
}
|
||||
return &RegexMatcher{pattern: regex}, nil
|
||||
default:
|
||||
panic("Unknown type")
|
||||
return nil, errors.New("unknown matcher type")
|
||||
}
|
||||
}
|
||||
|
||||
// NewDomainPattern creates a new Matcher based on the given domain pattern.
|
||||
// It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern.
|
||||
func (t Type) NewDomainPattern(pattern string) (Matcher, error) {
|
||||
switch t {
|
||||
case Full:
|
||||
pattern, err := ToDomain(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return FullMatcher(pattern), nil
|
||||
case Substr:
|
||||
pattern, err := ToDomain(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return SubstrMatcher(pattern), nil
|
||||
case Domain:
|
||||
pattern, err := ToDomain(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return DomainMatcher(pattern), nil
|
||||
case Regex: // Regex's charset not in LDH subset
|
||||
regex, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &RegexMatcher{pattern: regex}, nil
|
||||
default:
|
||||
return nil, errors.New("unknown matcher type")
|
||||
}
|
||||
}
|
||||
|
||||
// ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made.
|
||||
// 1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952):
|
||||
// * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers)
|
||||
// * Digits 0 to 9
|
||||
// * Hyphens(-) and Periods(.)
|
||||
// 2. Non-ASCII characters not supported for now.
|
||||
// * May support Internationalized domain name to Punycode if needed in the future.
|
||||
func ToDomain(pattern string) (string, error) {
|
||||
builder := strings.Builder{}
|
||||
builder.Grow(len(pattern))
|
||||
for i := 0; i < len(pattern); i++ {
|
||||
c := pattern[i]
|
||||
if c >= utf8.RuneSelf {
|
||||
return "", errors.New("non-ASCII characters not supported for now")
|
||||
}
|
||||
switch {
|
||||
case 'A' <= c && c <= 'Z':
|
||||
c += 'a' - 'A'
|
||||
case 'a' <= c && c <= 'z':
|
||||
case '0' <= c && c <= '9':
|
||||
case c == '-':
|
||||
case c == '.':
|
||||
default:
|
||||
return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset")
|
||||
}
|
||||
builder.WriteByte(c)
|
||||
}
|
||||
return builder.String(), nil
|
||||
}
|
||||
|
||||
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
|
||||
type MatcherGroupForAll interface {
|
||||
AddMatcher(matcher Matcher, value uint32)
|
||||
@ -137,6 +205,10 @@ type MatcherGroupForRegex interface {
|
||||
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
|
||||
// This function is provided to help writing code to test a MatcherGroup.
|
||||
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
||||
if g, ok := g.(IndexMatcher); ok {
|
||||
g.Add(matcher)
|
||||
return nil
|
||||
}
|
||||
if g, ok := g.(MatcherGroupForAll); ok {
|
||||
g.AddMatcher(matcher, value)
|
||||
return nil
|
||||
@ -165,3 +237,40 @@ func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
||||
}
|
||||
return errors.New("cannot add matcher to matcher group")
|
||||
}
|
||||
|
||||
// CompositeMatches flattens the matches slice to produce a single matched indices slice.
|
||||
// It is designed to avoid new memory allocation as possible.
|
||||
func CompositeMatches(matches [][]uint32) []uint32 {
|
||||
switch len(matches) {
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
return matches[0]
|
||||
default:
|
||||
result := make([]uint32, 0, 5)
|
||||
for i := 0; i < len(matches); i++ {
|
||||
result = append(result, matches[i]...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// CompositeMatches flattens the matches slice to produce a single matched indices slice.
|
||||
// It is designed that:
|
||||
// 1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher.
|
||||
// 2. Indices in the same matcher keeps their original order.
|
||||
// 3. Avoid new memory allocation as possible.
|
||||
func CompositeMatchesReverse(matches [][]uint32) []uint32 {
|
||||
switch len(matches) {
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
return matches[0]
|
||||
default:
|
||||
result := make([]uint32, 0, 5)
|
||||
for i := len(matches) - 1; i >= 0; i-- {
|
||||
result = append(result, matches[i]...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user