1
0
mirror of https://github.com/v2fly/v2ray-core.git synced 2025-01-05 00:47:51 -05:00

feat: Implement Match and MatchAny for all MatcherGroup, IndexMatcher

[common/strmatcher] Implement Match and MatchAny for all MatcherGroup and IndexMatcher
This commit is contained in:
Vigilans 2022-09-16 14:40:03 +08:00 committed by GitHub
parent 2e0ea88041
commit f494df2567
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 612 additions and 303 deletions

View File

@ -0,0 +1,58 @@
package strmatcher_test
import (
"testing"
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
)
func BenchmarkLinearIndexMatcher(b *testing.B) {
benchmarkIndexMatcher(b, func() IndexMatcher {
return NewLinearIndexMatcher()
})
}
func BenchmarkMphIndexMatcher(b *testing.B) {
benchmarkIndexMatcher(b, func() IndexMatcher {
return NewMphIndexMatcher()
})
}
func benchmarkIndexMatcher(b *testing.B, ctor func() IndexMatcher) {
b.Run("Match", func(b *testing.B) {
b.Run("Domain------------", func(b *testing.B) {
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true})
})
b.Run("Domain+Full-------", func(b *testing.B) {
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true})
})
b.Run("Domain+Full+Substr", func(b *testing.B) {
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true})
})
b.Run("All-Fail----------", func(b *testing.B) {
benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false})
})
})
b.Run("Match/Dotless", func(b *testing.B) { // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used.
b.Run("All-Succ", func(b *testing.B) {
benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true, Regex: true})
})
b.Run("All-Fail", func(b *testing.B) {
benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false, Regex: false})
})
})
b.Run("MatchAny", func(b *testing.B) {
b.Run("First-Full--", func(b *testing.B) {
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: true, Domain: true, Substr: true})
})
b.Run("First-Domain", func(b *testing.B) {
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: true, Substr: true})
})
b.Run("First-Substr", func(b *testing.B) {
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: true})
})
b.Run("All-Fail----", func(b *testing.B) {
benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: false})
})
})
}

View File

@ -0,0 +1,149 @@
package strmatcher_test
import (
"strconv"
"testing"
"github.com/v2fly/v2ray-core/v5/common"
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
)
func BenchmarkFullMatcher(b *testing.B) {
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
benchmarkMatcherType(b, Full, func() MatcherGroup {
return new(SimpleMatcherGroup)
})
})
b.Run("FullMatcherGroup--------", func(b *testing.B) {
benchmarkMatcherType(b, Full, func() MatcherGroup {
return NewFullMatcherGroup()
})
})
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
benchmarkMatcherType(b, Full, func() MatcherGroup {
return NewACAutomatonMatcherGroup()
})
})
b.Run("MphMatcherGroup---------", func(b *testing.B) {
benchmarkMatcherType(b, Full, func() MatcherGroup {
return NewMphMatcherGroup()
})
})
}
func BenchmarkDomainMatcher(b *testing.B) {
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
benchmarkMatcherType(b, Domain, func() MatcherGroup {
return new(SimpleMatcherGroup)
})
})
b.Run("DomainMatcherGroup------", func(b *testing.B) {
benchmarkMatcherType(b, Domain, func() MatcherGroup {
return NewDomainMatcherGroup()
})
})
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
benchmarkMatcherType(b, Domain, func() MatcherGroup {
return NewACAutomatonMatcherGroup()
})
})
b.Run("MphMatcherGroup---------", func(b *testing.B) {
benchmarkMatcherType(b, Domain, func() MatcherGroup {
return NewMphMatcherGroup()
})
})
}
func BenchmarkSubstrMatcher(b *testing.B) {
b.Run("SimpleMatcherGroup------", func(b *testing.B) {
benchmarkMatcherType(b, Substr, func() MatcherGroup {
return new(SimpleMatcherGroup)
})
})
b.Run("SubstrMatcherGroup------", func(b *testing.B) {
benchmarkMatcherType(b, Substr, func() MatcherGroup {
return new(SubstrMatcherGroup)
})
})
b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
benchmarkMatcherType(b, Substr, func() MatcherGroup {
return NewACAutomatonMatcherGroup()
})
})
}
// Utility functions for benchmark
func benchmarkMatcherType(b *testing.B, t Type, ctor func() MatcherGroup) {
b.Run("Match", func(b *testing.B) {
b.Run("Succ", func(b *testing.B) {
benchmarkMatch(b, ctor(), map[Type]bool{t: true})
})
b.Run("Fail", func(b *testing.B) {
benchmarkMatch(b, ctor(), map[Type]bool{t: false})
})
})
b.Run("MatchAny", func(b *testing.B) {
b.Run("Succ", func(b *testing.B) {
benchmarkMatchAny(b, ctor(), map[Type]bool{t: true})
})
b.Run("Fail", func(b *testing.B) {
benchmarkMatchAny(b, ctor(), map[Type]bool{t: false})
})
})
}
func benchmarkMatch(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) {
prepareMatchers(g, enabledTypes)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}
func benchmarkMatchAny(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) {
prepareMatchers(g, enabledTypes)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.MatchAny("0.v2fly.org")
}
}
func prepareMatchers(g MatcherGroup, enabledTypes map[Type]bool) {
for matcherType, hasMatch := range enabledTypes {
switch matcherType {
case Domain:
if hasMatch {
AddMatcherToGroup(g, DomainMatcher("v2fly.org"), 0)
}
for i := 1; i < 1024; i++ {
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
case Full:
if hasMatch {
AddMatcherToGroup(g, FullMatcher("0.v2fly.org"), 0)
}
for i := 1; i < 64; i++ {
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
case Substr:
if hasMatch {
AddMatcherToGroup(g, SubstrMatcher("v2fly.org"), 0)
}
for i := 1; i < 4; i++ {
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
case Regex:
matcher, err := Regex.New("^[^.]*$") // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used.
common.Must(err)
AddMatcherToGroup(g, matcher, 0)
}
}
if g, ok := g.(buildable); ok {
common.Must(g.Build())
}
}
type buildable interface {
Build() error
}

View File

@ -1,161 +0,0 @@
package strmatcher_test
import (
"strconv"
"testing"
"github.com/v2fly/v2ray-core/v5/common"
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
)
// Benchmark Domain Matcher Groups
func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) {
g := new(SimpleMatcherGroup)
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}
func BenchmarkDomainMatcherGroup(b *testing.B) {
g := new(DomainMatcherGroup)
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}
func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) {
ac := NewACAutomatonMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
ac.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ac.MatchAny("0.v2fly.org")
}
}
func BenchmarkMphMatcherGroupForDomain(b *testing.B) {
mph := NewMphMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
mph.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = mph.MatchAny("0.v2fly.org")
}
}
// Benchmark Full Matcher Groups
func BenchmarkSimpleMatcherGroupForFull(b *testing.B) {
g := new(SimpleMatcherGroup)
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}
func BenchmarkFullMatcherGroup(b *testing.B) {
g := new(FullMatcherGroup)
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}
func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) {
ac := NewACAutomatonMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
ac.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ac.MatchAny("0.v2fly.org")
}
}
func BenchmarkMphMatcherGroupFull(b *testing.B) {
mph := NewMphMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
mph.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = mph.MatchAny("0.v2fly.org")
}
}
// Benchmark Substr Matcher Groups
func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) {
g := new(SimpleMatcherGroup)
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}
func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) {
ac := NewACAutomatonMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
ac.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ac.MatchAny("0.v2fly.org")
}
}
// Benchmark Index Matchers
func BenchmarkLinearIndexMatcher(b *testing.B) {
g := new(LinearIndexMatcher)
for i := 1; i <= 1024; i++ {
m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
common.Must(err)
g.Add(m)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}

View File

@ -1,13 +1,12 @@
package strmatcher package strmatcher
// LinearIndexMatcher is an implementation of IndexMatcher. // LinearIndexMatcher is an implementation of IndexMatcher.
// Empty initialization works.
type LinearIndexMatcher struct { type LinearIndexMatcher struct {
count uint32 count uint32
fullMatcher FullMatcherGroup full *FullMatcherGroup
domainMatcher DomainMatcherGroup domain *DomainMatcherGroup
substrMatcher SubstrMatcherGroup substr *SubstrMatcherGroup
otherMatchers SimpleMatcherGroup regex *SimpleMatcherGroup
} }
func NewLinearIndexMatcher() *LinearIndexMatcher { func NewLinearIndexMatcher() *LinearIndexMatcher {
@ -21,13 +20,25 @@ func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 {
switch matcher := matcher.(type) { switch matcher := matcher.(type) {
case FullMatcher: case FullMatcher:
g.fullMatcher.AddFullMatcher(matcher, index) if g.full == nil {
g.full = NewFullMatcherGroup()
}
g.full.AddFullMatcher(matcher, index)
case DomainMatcher: case DomainMatcher:
g.domainMatcher.AddDomainMatcher(matcher, index) if g.domain == nil {
g.domain = NewDomainMatcherGroup()
}
g.domain.AddDomainMatcher(matcher, index)
case SubstrMatcher: case SubstrMatcher:
g.substrMatcher.AddSubstrMatcher(matcher, index) if g.substr == nil {
g.substr = new(SubstrMatcherGroup)
}
g.substr.AddSubstrMatcher(matcher, index)
default: default:
g.otherMatchers.AddMatcher(matcher, index) if g.regex == nil {
g.regex = new(SimpleMatcherGroup)
}
g.regex.AddMatcher(matcher, index)
} }
return index return index
@ -40,17 +51,43 @@ func (*LinearIndexMatcher) Build() error {
// Match implements IndexMatcher.Match. // Match implements IndexMatcher.Match.
func (g *LinearIndexMatcher) Match(input string) []uint32 { func (g *LinearIndexMatcher) Match(input string) []uint32 {
result := []uint32{} // Allocate capacity to prevent matches escaping to heap
result = append(result, g.fullMatcher.Match(input)...) result := make([][]uint32, 0, 5)
result = append(result, g.domainMatcher.Match(input)...) if g.full != nil {
result = append(result, g.substrMatcher.Match(input)...) if matches := g.full.Match(input); len(matches) > 0 {
result = append(result, g.otherMatchers.Match(input)...) result = append(result, matches)
return result }
}
if g.domain != nil {
if matches := g.domain.Match(input); len(matches) > 0 {
result = append(result, matches)
}
}
if g.substr != nil {
if matches := g.substr.Match(input); len(matches) > 0 {
result = append(result, matches)
}
}
if g.regex != nil {
if matches := g.regex.Match(input); len(matches) > 0 {
result = append(result, matches)
}
}
return CompositeMatches(result)
} }
// MatchAny implements IndexMatcher.MatchAny. // MatchAny implements IndexMatcher.MatchAny.
func (g *LinearIndexMatcher) MatchAny(input string) bool { func (g *LinearIndexMatcher) MatchAny(input string) bool {
return len(g.Match(input)) > 0 if g.full != nil && g.full.MatchAny(input) {
return true
}
if g.domain != nil && g.domain.MatchAny(input) {
return true
}
if g.substr != nil && g.substr.MatchAny(input) {
return true
}
return g.regex != nil && g.regex.MatchAny(input)
} }
// Size implements IndexMatcher.Size. // Size implements IndexMatcher.Size.

View File

@ -8,15 +8,11 @@ type MphIndexMatcher struct {
count uint32 count uint32
mph *MphMatcherGroup mph *MphMatcherGroup
ac *ACAutomatonMatcherGroup ac *ACAutomatonMatcherGroup
regex SimpleMatcherGroup regex *SimpleMatcherGroup
} }
func NewMphIndexMatcher() *MphIndexMatcher { func NewMphIndexMatcher() *MphIndexMatcher {
return &MphIndexMatcher{ return new(MphIndexMatcher)
mph: nil,
ac: nil,
regex: SimpleMatcherGroup{},
}
} }
// Add implements IndexMatcher.Add. // Add implements IndexMatcher.Add.
@ -41,6 +37,9 @@ func (g *MphIndexMatcher) Add(matcher Matcher) uint32 {
} }
g.ac.AddSubstrMatcher(matcher, index) g.ac.AddSubstrMatcher(matcher, index)
case *RegexMatcher: case *RegexMatcher:
if g.regex == nil {
g.regex = &SimpleMatcherGroup{}
}
g.regex.AddMatcher(matcher, index) g.regex.AddMatcher(matcher, index)
} }
@ -59,8 +58,24 @@ func (g *MphIndexMatcher) Build() error {
} }
// Match implements IndexMatcher.Match. // Match implements IndexMatcher.Match.
func (*MphIndexMatcher) Match(string) []uint32 { func (g *MphIndexMatcher) Match(input string) []uint32 {
return nil result := make([][]uint32, 0, 5)
if g.mph != nil {
if matches := g.mph.Match(input); len(matches) > 0 {
result = append(result, matches)
}
}
if g.ac != nil {
if matches := g.ac.Match(input); len(matches) > 0 {
result = append(result, matches)
}
}
if g.regex != nil {
if matches := g.regex.Match(input); len(matches) > 0 {
result = append(result, matches)
}
}
return CompositeMatches(result)
} }
// MatchAny implements IndexMatcher.MatchAny. // MatchAny implements IndexMatcher.MatchAny.
@ -71,7 +86,7 @@ func (g *MphIndexMatcher) MatchAny(input string) bool {
if g.ac != nil && g.ac.MatchAny(input) { if g.ac != nil && g.ac.MatchAny(input) {
return true return true
} }
return g.regex.MatchAny(input) return g.regex != nil && g.regex.MatchAny(input)
} }
// Size implements IndexMatcher.Size. // Size implements IndexMatcher.Size.

View File

@ -0,0 +1,94 @@
package strmatcher_test
import (
"reflect"
"testing"
"github.com/v2fly/v2ray-core/v5/common"
. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
)
func TestMphIndexMatcher(t *testing.T) {
rules := []struct {
Type Type
Domain string
}{
{
Type: Regex,
Domain: "apis\\.us$",
},
{
Type: Substr,
Domain: "apis",
},
{
Type: Domain,
Domain: "googleapis.com",
},
{
Type: Domain,
Domain: "com",
},
{
Type: Full,
Domain: "www.baidu.com",
},
{
Type: Substr,
Domain: "apis",
},
{
Type: Domain,
Domain: "googleapis.com",
},
{
Type: Full,
Domain: "fonts.googleapis.com",
},
{
Type: Full,
Domain: "www.baidu.com",
},
{
Type: Domain,
Domain: "example.com",
},
}
cases := []struct {
Input string
Output []uint32
}{
{
Input: "www.baidu.com",
Output: []uint32{5, 9, 4},
},
{
Input: "fonts.googleapis.com",
Output: []uint32{8, 3, 7, 4, 2, 6},
},
{
Input: "example.googleapis.com",
Output: []uint32{3, 7, 4, 2, 6},
},
{
Input: "testapis.us",
Output: []uint32{2, 6, 1},
},
{
Input: "example.com",
Output: []uint32{10, 4},
},
}
matcherGroup := NewMphIndexMatcher()
for _, rule := range rules {
matcher, err := rule.Type.New(rule.Domain)
common.Must(err)
matcherGroup.Add(matcher)
}
matcherGroup.Build()
for _, test := range cases {
if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
}

View File

@ -127,8 +127,8 @@ func (ac *ACAutomatonMatcherGroup) Build() error {
// Match implements MatcherGroup.Match. // Match implements MatcherGroup.Match.
func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 { func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
var suffixMatches [][]uint32 suffixMatches := make([][]uint32, 0, 5)
var substrMatches [][]uint32 substrMatches := make([][]uint32, 0, 5)
fullMatch := true // fullMatch indicates no fail edge traversed so far. fullMatch := true // fullMatch indicates no fail edge traversed so far.
node := &ac.nodes[0] // start from root node. node := &ac.nodes[0] // start from root node.
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN // 1. the match string is all through trie edge. FULL MATCH or DOMAIN
@ -177,18 +177,10 @@ func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
suffixMatches = append(suffixMatches, values[Full]) suffixMatches = append(suffixMatches, values[Full])
} }
} }
switch matches := append(substrMatches, suffixMatches...); len(matches) { // nolint: gocritic if len(substrMatches) == 0 {
case 0: return CompositeMatchesReverse(suffixMatches)
return nil
case 1:
return matches[0]
default:
result := []uint32{}
for i := len(matches) - 1; i >= 0; i-- {
result = append(result, matches[i]...)
}
return result
} }
return CompositeMatchesReverse(append(substrMatches, suffixMatches...))
} }
// MatchAny implements MatcherGroup.MatchAny. // MatchAny implements MatcherGroup.MatchAny.

View File

@ -1,101 +1,109 @@
package strmatcher package strmatcher
import "strings" type trieNode struct {
func breakDomain(domain string) []string {
return strings.Split(domain, ".")
}
type node struct {
values []uint32 values []uint32
sub map[string]*node children map[string]*trieNode
} }
// DomainMatcherGroup is an implementation of MatcherGroup. // DomainMatcherGroup is an implementation of MatcherGroup.
// It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based. // It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
type DomainMatcherGroup struct { type DomainMatcherGroup struct {
root *node root *trieNode
}
func NewDomainMatcherGroup() *DomainMatcherGroup {
return &DomainMatcherGroup{
root: new(trieNode),
}
} }
// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher. // AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) { func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
if g.root == nil { node := g.root
g.root = new(node) pattern := matcher.Pattern()
for i := len(pattern); i > 0; {
var part string
for j := i - 1; ; j-- {
if pattern[j] == '.' {
part = pattern[j+1 : i]
i = j
break
} }
if j == 0 {
current := g.root part = pattern[j:i]
parts := breakDomain(matcher.Pattern()) i = j
for i := len(parts) - 1; i >= 0; i-- { break
part := parts[i]
if current.sub == nil {
current.sub = make(map[string]*node)
} }
next := current.sub[part] }
if node.children == nil {
node.children = make(map[string]*trieNode)
}
next := node.children[part]
if next == nil { if next == nil {
next = new(node) next = new(trieNode)
current.sub[part] = next node.children[part] = next
} }
current = next node = next
} }
current.values = append(current.values, value) node.values = append(node.values, value)
} }
// Match implements MatcherGroup.Match. // Match implements MatcherGroup.Match.
func (g *DomainMatcherGroup) Match(domain string) []uint32 { func (g *DomainMatcherGroup) Match(input string) []uint32 {
if domain == "" { matches := make([][]uint32, 0, 5)
return nil node := g.root
} for i := len(input); i > 0; {
for j := i - 1; ; j-- {
current := g.root if input[j] == '.' { // Domain label found
if current == nil { node = node.children[input[j+1:i]]
return nil i = j
}
nextPart := func(idx int) int {
for i := idx - 1; i >= 0; i-- {
if domain[i] == '.' {
return i
}
}
return -1
}
matches := [][]uint32{}
idx := len(domain)
for {
if idx == -1 || current.sub == nil {
break break
} }
if j == 0 { // The last part of domain label
nidx := nextPart(idx) node = node.children[input[j:i]]
part := domain[nidx+1 : idx] i = j
next := current.sub[part]
if next == nil {
break break
} }
current = next }
idx = nidx if node == nil { // No more match if no trie edge transition
if len(current.values) > 0 { break
matches = append(matches, current.values) }
if len(node.values) > 0 { // Found matched matchers
matches = append(matches, node.values)
}
if node.children == nil { // No more match if leaf node reached
break
} }
} }
switch len(matches) { return CompositeMatchesReverse(matches)
case 0:
return nil
case 1:
return matches[0]
default:
result := []uint32{}
for idx := range matches {
// Insert reversely, the subdomain that matches further ranks higher
result = append(result, matches[len(matches)-1-idx]...)
}
return result
}
} }
// MatchAny implements MatcherGroup.MatchAny. // MatchAny implements MatcherGroup.MatchAny.
func (g *DomainMatcherGroup) MatchAny(domain string) bool { func (g *DomainMatcherGroup) MatchAny(input string) bool {
return len(g.Match(domain)) > 0 node := g.root
for i := len(input); i > 0; {
for j := i - 1; ; j-- {
if input[j] == '.' {
node = node.children[input[j+1:i]]
i = j
break
}
if j == 0 {
node = node.children[input[j:i]]
i = j
break
}
}
if node == nil {
return false
}
if len(node.values) > 0 {
return true
}
if node.children == nil {
return false
}
}
return false
} }

View File

@ -82,7 +82,7 @@ func TestDomainMatcherGroup(t *testing.T) {
Result: []uint32{4, 6}, Result: []uint32{4, 6},
}, },
} }
g := new(DomainMatcherGroup) g := NewDomainMatcherGroup()
for _, pattern := range patterns { for _, pattern := range patterns {
AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value) AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
} }
@ -95,7 +95,7 @@ func TestDomainMatcherGroup(t *testing.T) {
} }
func TestEmptyDomainMatcherGroup(t *testing.T) { func TestEmptyDomainMatcherGroup(t *testing.T) {
g := new(DomainMatcherGroup) g := NewDomainMatcherGroup()
r := g.Match("v2fly.org") r := g.Match("v2fly.org")
if len(r) != 0 { if len(r) != 0 {
t.Error("Expect [], but ", r) t.Error("Expect [], but ", r)

View File

@ -6,25 +6,25 @@ type FullMatcherGroup struct {
matchers map[string][]uint32 matchers map[string][]uint32
} }
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher. func NewFullMatcherGroup() *FullMatcherGroup {
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) { return &FullMatcherGroup{
if g.matchers == nil { matchers: make(map[string][]uint32),
g.matchers = make(map[string][]uint32) }
} }
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
domain := matcher.Pattern() domain := matcher.Pattern()
g.matchers[domain] = append(g.matchers[domain], value) g.matchers[domain] = append(g.matchers[domain], value)
} }
// Match implements MatcherGroup.Match. // Match implements MatcherGroup.Match.
func (g *FullMatcherGroup) Match(input string) []uint32 { func (g *FullMatcherGroup) Match(input string) []uint32 {
if g.matchers == nil {
return nil
}
return g.matchers[input] return g.matchers[input]
} }
// MatchAny implements MatcherGroup.Any. // MatchAny implements MatcherGroup.Any.
func (g *FullMatcherGroup) MatchAny(input string) bool { func (g *FullMatcherGroup) MatchAny(input string) bool {
return len(g.Match(input)) > 0 _, found := g.matchers[input]
return found
} }

View File

@ -50,7 +50,7 @@ func TestFullMatcherGroup(t *testing.T) {
Result: []uint32{4, 6}, Result: []uint32{4, 6},
}, },
} }
g := new(FullMatcherGroup) g := NewFullMatcherGroup()
for _, pattern := range patterns { for _, pattern := range patterns {
AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value) AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
} }
@ -63,7 +63,7 @@ func TestFullMatcherGroup(t *testing.T) {
} }
func TestEmptyFullMatcherGroup(t *testing.T) { func TestEmptyFullMatcherGroup(t *testing.T) {
g := new(FullMatcherGroup) g := NewFullMatcherGroup()
r := g.Match("v2fly.org") r := g.Match("v2fly.org")
if len(r) != 0 { if len(r) != 0 {
t.Error("Expect [], but ", r) t.Error("Expect [], but ", r)

View File

@ -152,7 +152,7 @@ func (g *MphMatcherGroup) Lookup(rollingHash uint32, input string) uint32 {
// Match implements MatcherGroup.Match. // Match implements MatcherGroup.Match.
func (g *MphMatcherGroup) Match(input string) []uint32 { func (g *MphMatcherGroup) Match(input string) []uint32 {
matches := [][]uint32{} matches := make([][]uint32, 0, 5)
hash := uint32(0) hash := uint32(0)
for i := len(input) - 1; i >= 0; i-- { for i := len(input) - 1; i >= 0; i-- {
hash = hash*PrimeRK + uint32(input[i]) hash = hash*PrimeRK + uint32(input[i])
@ -165,18 +165,7 @@ func (g *MphMatcherGroup) Match(input string) []uint32 {
if mphIdx := g.Lookup(hash, input); mphIdx != 0 { if mphIdx := g.Lookup(hash, input); mphIdx != 0 {
matches = append(matches, g.values[mphIdx]) matches = append(matches, g.values[mphIdx])
} }
switch len(matches) { return CompositeMatchesReverse(matches)
case 0:
return nil
case 1:
return matches[0]
default:
result := []uint32{}
for i := len(matches) - 1; i >= 0; i-- {
result = append(result, matches[i]...)
}
return result
}
} }
// MatchAny implements MatcherGroup.MatchAny. // MatchAny implements MatcherGroup.MatchAny.

View File

@ -32,5 +32,10 @@ func (g *SimpleMatcherGroup) Match(input string) []uint32 {
// MatchAny implements MatcherGroup.MatchAny. // MatchAny implements MatcherGroup.MatchAny.
func (g *SimpleMatcherGroup) MatchAny(input string) bool { func (g *SimpleMatcherGroup) MatchAny(input string) bool {
return len(g.Match(input)) > 0 for _, e := range g.matchers {
if e.matcher.Match(input) {
return true
}
}
return false
} }

View File

@ -20,16 +20,30 @@ func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint3
// Match implements MatcherGroup.Match. // Match implements MatcherGroup.Match.
func (g *SubstrMatcherGroup) Match(input string) []uint32 { func (g *SubstrMatcherGroup) Match(input string) []uint32 {
result := []uint32{} var result []uint32
for i, pattern := range g.patterns { for i, pattern := range g.patterns {
for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) { for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit) result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
} }
} }
// sort.Slice will trigger allocation no matter what input is. See https://github.com/golang/go/issues/17332
// We optimize the sorting by length to prevent memory allocation as possible.
switch len(result) {
case 0:
return nil
case 1:
// No need to sort
case 2:
// Do a simple swap if unsorted
if result[0] > result[1] {
result[0], result[1] = result[1], result[0]
}
default:
// Sort the match results in dictionary order, so that: // Sort the match results in dictionary order, so that:
// 1. Pattern matched at smaller position (meaning matched further) takes precedence. // 1. Pattern matched at smaller position (meaning matched further) takes precedence.
// 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence. // 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
sort.Slice(result, func(i, j int) bool { return result[i] < result[j] }) sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
}
for i, entry := range result { for i, entry := range result {
result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit) result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
} }

View File

@ -4,6 +4,7 @@ import (
"errors" "errors"
"regexp" "regexp"
"strings" "strings"
"unicode/utf8"
) )
// FullMatcher is an implementation of Matcher. // FullMatcher is an implementation of Matcher.
@ -96,6 +97,10 @@ func (t Type) New(pattern string) (Matcher, error) {
case Substr: case Substr:
return SubstrMatcher(pattern), nil return SubstrMatcher(pattern), nil
case Domain: case Domain:
pattern, err := ToDomain(pattern)
if err != nil {
return nil, err
}
return DomainMatcher(pattern), nil return DomainMatcher(pattern), nil
case Regex: // 1. regex matching is case-sensitive case Regex: // 1. regex matching is case-sensitive
regex, err := regexp.Compile(pattern) regex, err := regexp.Compile(pattern)
@ -104,10 +109,73 @@ func (t Type) New(pattern string) (Matcher, error) {
} }
return &RegexMatcher{pattern: regex}, nil return &RegexMatcher{pattern: regex}, nil
default: default:
panic("Unknown type") return nil, errors.New("unknown matcher type")
} }
} }
// NewDomainPattern creates a new Matcher based on the given domain pattern.
// It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern.
func (t Type) NewDomainPattern(pattern string) (Matcher, error) {
switch t {
case Full:
pattern, err := ToDomain(pattern)
if err != nil {
return nil, err
}
return FullMatcher(pattern), nil
case Substr:
pattern, err := ToDomain(pattern)
if err != nil {
return nil, err
}
return SubstrMatcher(pattern), nil
case Domain:
pattern, err := ToDomain(pattern)
if err != nil {
return nil, err
}
return DomainMatcher(pattern), nil
case Regex: // Regex's charset not in LDH subset
regex, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return &RegexMatcher{pattern: regex}, nil
default:
return nil, errors.New("unknown matcher type")
}
}
// ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made.
// 1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952):
// * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers)
// * Digits 0 to 9
// * Hyphens(-) and Periods(.)
// 2. Non-ASCII characters not supported for now.
// * May support Internationalized domain name to Punycode if needed in the future.
func ToDomain(pattern string) (string, error) {
builder := strings.Builder{}
builder.Grow(len(pattern))
for i := 0; i < len(pattern); i++ {
c := pattern[i]
if c >= utf8.RuneSelf {
return "", errors.New("non-ASCII characters not supported for now")
}
switch {
case 'A' <= c && c <= 'Z':
c += 'a' - 'A'
case 'a' <= c && c <= 'z':
case '0' <= c && c <= '9':
case c == '-':
case c == '.':
default:
return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset")
}
builder.WriteByte(c)
}
return builder.String(), nil
}
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers. // MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
type MatcherGroupForAll interface { type MatcherGroupForAll interface {
AddMatcher(matcher Matcher, value uint32) AddMatcher(matcher Matcher, value uint32)
@ -137,6 +205,10 @@ type MatcherGroupForRegex interface {
// It returns error if the MatcherGroup does not accept the provided Matcher's type. // It returns error if the MatcherGroup does not accept the provided Matcher's type.
// This function is provided to help writing code to test a MatcherGroup. // This function is provided to help writing code to test a MatcherGroup.
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error { func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
if g, ok := g.(IndexMatcher); ok {
g.Add(matcher)
return nil
}
if g, ok := g.(MatcherGroupForAll); ok { if g, ok := g.(MatcherGroupForAll); ok {
g.AddMatcher(matcher, value) g.AddMatcher(matcher, value)
return nil return nil
@ -165,3 +237,40 @@ func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
} }
return errors.New("cannot add matcher to matcher group") return errors.New("cannot add matcher to matcher group")
} }
// CompositeMatches flattens the matches slice to produce a single matched indices slice.
// It is designed to avoid new memory allocation as possible.
func CompositeMatches(matches [][]uint32) []uint32 {
switch len(matches) {
case 0:
return nil
case 1:
return matches[0]
default:
result := make([]uint32, 0, 5)
for i := 0; i < len(matches); i++ {
result = append(result, matches[i]...)
}
return result
}
}
// CompositeMatches flattens the matches slice to produce a single matched indices slice.
// It is designed that:
// 1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher.
// 2. Indices in the same matcher keeps their original order.
// 3. Avoid new memory allocation as possible.
func CompositeMatchesReverse(matches [][]uint32) []uint32 {
switch len(matches) {
case 0:
return nil
case 1:
return matches[0]
default:
result := make([]uint32, 0, 5)
for i := len(matches) - 1; i >= 0; i-- {
result = append(result, matches[i]...)
}
return result
}
}