From f494df2567c751011ee3d8bedacde12ec5e14639 Mon Sep 17 00:00:00 2001 From: Vigilans Date: Fri, 16 Sep 2022 14:40:03 +0800 Subject: [PATCH] feat: Implement Match and MatchAny for all MatcherGroup, IndexMatcher [common/strmatcher] Implement Match and MatchAny for all MatcherGroup and IndexMatcher --- .../strmatcher/benchmark_indexmatcher_test.go | 58 +++++++ common/strmatcher/benchmark_matchers_test.go | 149 ++++++++++++++++ common/strmatcher/benchmark_test.go | 161 ------------------ common/strmatcher/indexmatcher_linear.go | 71 ++++++-- common/strmatcher/indexmatcher_mph.go | 33 +++- common/strmatcher/indexmatcher_mph_test.go | 94 ++++++++++ .../strmatcher/matchergroup_ac_automation.go | 18 +- common/strmatcher/matchergroup_domain.go | 150 ++++++++-------- common/strmatcher/matchergroup_domain_test.go | 4 +- common/strmatcher/matchergroup_full.go | 16 +- common/strmatcher/matchergroup_full_test.go | 4 +- common/strmatcher/matchergroup_mph.go | 15 +- common/strmatcher/matchergroup_simple.go | 7 +- common/strmatcher/matchergroup_substr.go | 24 ++- common/strmatcher/matchers.go | 111 +++++++++++- 15 files changed, 612 insertions(+), 303 deletions(-) create mode 100644 common/strmatcher/benchmark_indexmatcher_test.go create mode 100644 common/strmatcher/benchmark_matchers_test.go delete mode 100644 common/strmatcher/benchmark_test.go create mode 100644 common/strmatcher/indexmatcher_mph_test.go diff --git a/common/strmatcher/benchmark_indexmatcher_test.go b/common/strmatcher/benchmark_indexmatcher_test.go new file mode 100644 index 000000000..8ca1a223b --- /dev/null +++ b/common/strmatcher/benchmark_indexmatcher_test.go @@ -0,0 +1,58 @@ +package strmatcher_test + +import ( + "testing" + + . "github.com/v2fly/v2ray-core/v5/common/strmatcher" +) + +func BenchmarkLinearIndexMatcher(b *testing.B) { + benchmarkIndexMatcher(b, func() IndexMatcher { + return NewLinearIndexMatcher() + }) +} + +func BenchmarkMphIndexMatcher(b *testing.B) { + benchmarkIndexMatcher(b, func() IndexMatcher { + return NewMphIndexMatcher() + }) +} + +func benchmarkIndexMatcher(b *testing.B, ctor func() IndexMatcher) { + b.Run("Match", func(b *testing.B) { + b.Run("Domain------------", func(b *testing.B) { + benchmarkMatch(b, ctor(), map[Type]bool{Domain: true}) + }) + b.Run("Domain+Full-------", func(b *testing.B) { + benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true}) + }) + b.Run("Domain+Full+Substr", func(b *testing.B) { + benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true}) + }) + b.Run("All-Fail----------", func(b *testing.B) { + benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false}) + }) + }) + b.Run("Match/Dotless", func(b *testing.B) { // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used. + b.Run("All-Succ", func(b *testing.B) { + benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true, Regex: true}) + }) + b.Run("All-Fail", func(b *testing.B) { + benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false, Regex: false}) + }) + }) + b.Run("MatchAny", func(b *testing.B) { + b.Run("First-Full--", func(b *testing.B) { + benchmarkMatchAny(b, ctor(), map[Type]bool{Full: true, Domain: true, Substr: true}) + }) + b.Run("First-Domain", func(b *testing.B) { + benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: true, Substr: true}) + }) + b.Run("First-Substr", func(b *testing.B) { + benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: true}) + }) + b.Run("All-Fail----", func(b *testing.B) { + benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: false}) + }) + }) +} diff --git a/common/strmatcher/benchmark_matchers_test.go b/common/strmatcher/benchmark_matchers_test.go new file mode 100644 index 000000000..ac739b185 --- /dev/null +++ b/common/strmatcher/benchmark_matchers_test.go @@ -0,0 +1,149 @@ +package strmatcher_test + +import ( + "strconv" + "testing" + + "github.com/v2fly/v2ray-core/v5/common" + . "github.com/v2fly/v2ray-core/v5/common/strmatcher" +) + +func BenchmarkFullMatcher(b *testing.B) { + b.Run("SimpleMatcherGroup------", func(b *testing.B) { + benchmarkMatcherType(b, Full, func() MatcherGroup { + return new(SimpleMatcherGroup) + }) + }) + b.Run("FullMatcherGroup--------", func(b *testing.B) { + benchmarkMatcherType(b, Full, func() MatcherGroup { + return NewFullMatcherGroup() + }) + }) + b.Run("ACAutomationMatcherGroup", func(b *testing.B) { + benchmarkMatcherType(b, Full, func() MatcherGroup { + return NewACAutomatonMatcherGroup() + }) + }) + b.Run("MphMatcherGroup---------", func(b *testing.B) { + benchmarkMatcherType(b, Full, func() MatcherGroup { + return NewMphMatcherGroup() + }) + }) +} + +func BenchmarkDomainMatcher(b *testing.B) { + b.Run("SimpleMatcherGroup------", func(b *testing.B) { + benchmarkMatcherType(b, Domain, func() MatcherGroup { + return new(SimpleMatcherGroup) + }) + }) + b.Run("DomainMatcherGroup------", func(b *testing.B) { + benchmarkMatcherType(b, Domain, func() MatcherGroup { + return NewDomainMatcherGroup() + }) + }) + b.Run("ACAutomationMatcherGroup", func(b *testing.B) { + benchmarkMatcherType(b, Domain, func() MatcherGroup { + return NewACAutomatonMatcherGroup() + }) + }) + b.Run("MphMatcherGroup---------", func(b *testing.B) { + benchmarkMatcherType(b, Domain, func() MatcherGroup { + return NewMphMatcherGroup() + }) + }) +} + +func BenchmarkSubstrMatcher(b *testing.B) { + b.Run("SimpleMatcherGroup------", func(b *testing.B) { + benchmarkMatcherType(b, Substr, func() MatcherGroup { + return new(SimpleMatcherGroup) + }) + }) + b.Run("SubstrMatcherGroup------", func(b *testing.B) { + benchmarkMatcherType(b, Substr, func() MatcherGroup { + return new(SubstrMatcherGroup) + }) + }) + b.Run("ACAutomationMatcherGroup", func(b *testing.B) { + benchmarkMatcherType(b, Substr, func() MatcherGroup { + return NewACAutomatonMatcherGroup() + }) + }) +} + +// Utility functions for benchmark + +func benchmarkMatcherType(b *testing.B, t Type, ctor func() MatcherGroup) { + b.Run("Match", func(b *testing.B) { + b.Run("Succ", func(b *testing.B) { + benchmarkMatch(b, ctor(), map[Type]bool{t: true}) + }) + b.Run("Fail", func(b *testing.B) { + benchmarkMatch(b, ctor(), map[Type]bool{t: false}) + }) + }) + b.Run("MatchAny", func(b *testing.B) { + b.Run("Succ", func(b *testing.B) { + benchmarkMatchAny(b, ctor(), map[Type]bool{t: true}) + }) + b.Run("Fail", func(b *testing.B) { + benchmarkMatchAny(b, ctor(), map[Type]bool{t: false}) + }) + }) +} + +func benchmarkMatch(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) { + prepareMatchers(g, enabledTypes) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = g.Match("0.v2fly.org") + } +} + +func benchmarkMatchAny(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) { + prepareMatchers(g, enabledTypes) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = g.MatchAny("0.v2fly.org") + } +} + +func prepareMatchers(g MatcherGroup, enabledTypes map[Type]bool) { + for matcherType, hasMatch := range enabledTypes { + switch matcherType { + case Domain: + if hasMatch { + AddMatcherToGroup(g, DomainMatcher("v2fly.org"), 0) + } + for i := 1; i < 1024; i++ { + AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + case Full: + if hasMatch { + AddMatcherToGroup(g, FullMatcher("0.v2fly.org"), 0) + } + for i := 1; i < 64; i++ { + AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + case Substr: + if hasMatch { + AddMatcherToGroup(g, SubstrMatcher("v2fly.org"), 0) + } + for i := 1; i < 4; i++ { + AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + case Regex: + matcher, err := Regex.New("^[^.]*$") // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used. + common.Must(err) + AddMatcherToGroup(g, matcher, 0) + } + } + if g, ok := g.(buildable); ok { + common.Must(g.Build()) + } +} + +type buildable interface { + Build() error +} diff --git a/common/strmatcher/benchmark_test.go b/common/strmatcher/benchmark_test.go deleted file mode 100644 index 0d1ffefb9..000000000 --- a/common/strmatcher/benchmark_test.go +++ /dev/null @@ -1,161 +0,0 @@ -package strmatcher_test - -import ( - "strconv" - "testing" - - "github.com/v2fly/v2ray-core/v5/common" - . "github.com/v2fly/v2ray-core/v5/common/strmatcher" -) - -// Benchmark Domain Matcher Groups - -func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) { - g := new(SimpleMatcherGroup) - - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = g.Match("0.v2fly.org") - } -} - -func BenchmarkDomainMatcherGroup(b *testing.B) { - g := new(DomainMatcherGroup) - - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = g.Match("0.v2fly.org") - } -} - -func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) { - ac := NewACAutomatonMatcherGroup() - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - ac.Build() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = ac.MatchAny("0.v2fly.org") - } -} - -func BenchmarkMphMatcherGroupForDomain(b *testing.B) { - mph := NewMphMatcherGroup() - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - mph.Build() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = mph.MatchAny("0.v2fly.org") - } -} - -// Benchmark Full Matcher Groups - -func BenchmarkSimpleMatcherGroupForFull(b *testing.B) { - g := new(SimpleMatcherGroup) - - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = g.Match("0.v2fly.org") - } -} - -func BenchmarkFullMatcherGroup(b *testing.B) { - g := new(FullMatcherGroup) - - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = g.Match("0.v2fly.org") - } -} - -func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) { - ac := NewACAutomatonMatcherGroup() - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - ac.Build() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = ac.MatchAny("0.v2fly.org") - } -} - -func BenchmarkMphMatcherGroupFull(b *testing.B) { - mph := NewMphMatcherGroup() - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - mph.Build() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = mph.MatchAny("0.v2fly.org") - } -} - -// Benchmark Substr Matcher Groups - -func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) { - g := new(SimpleMatcherGroup) - - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = g.Match("0.v2fly.org") - } -} - -func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) { - ac := NewACAutomatonMatcherGroup() - for i := 1; i <= 1024; i++ { - AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) - } - ac.Build() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = ac.MatchAny("0.v2fly.org") - } -} - -// Benchmark Index Matchers - -func BenchmarkLinearIndexMatcher(b *testing.B) { - g := new(LinearIndexMatcher) - for i := 1; i <= 1024; i++ { - m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org") - common.Must(err) - g.Add(m) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = g.Match("0.v2fly.org") - } -} diff --git a/common/strmatcher/indexmatcher_linear.go b/common/strmatcher/indexmatcher_linear.go index da599416e..dcdc1d336 100644 --- a/common/strmatcher/indexmatcher_linear.go +++ b/common/strmatcher/indexmatcher_linear.go @@ -1,13 +1,12 @@ package strmatcher // LinearIndexMatcher is an implementation of IndexMatcher. -// Empty initialization works. type LinearIndexMatcher struct { - count uint32 - fullMatcher FullMatcherGroup - domainMatcher DomainMatcherGroup - substrMatcher SubstrMatcherGroup - otherMatchers SimpleMatcherGroup + count uint32 + full *FullMatcherGroup + domain *DomainMatcherGroup + substr *SubstrMatcherGroup + regex *SimpleMatcherGroup } func NewLinearIndexMatcher() *LinearIndexMatcher { @@ -21,13 +20,25 @@ func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 { switch matcher := matcher.(type) { case FullMatcher: - g.fullMatcher.AddFullMatcher(matcher, index) + if g.full == nil { + g.full = NewFullMatcherGroup() + } + g.full.AddFullMatcher(matcher, index) case DomainMatcher: - g.domainMatcher.AddDomainMatcher(matcher, index) + if g.domain == nil { + g.domain = NewDomainMatcherGroup() + } + g.domain.AddDomainMatcher(matcher, index) case SubstrMatcher: - g.substrMatcher.AddSubstrMatcher(matcher, index) + if g.substr == nil { + g.substr = new(SubstrMatcherGroup) + } + g.substr.AddSubstrMatcher(matcher, index) default: - g.otherMatchers.AddMatcher(matcher, index) + if g.regex == nil { + g.regex = new(SimpleMatcherGroup) + } + g.regex.AddMatcher(matcher, index) } return index @@ -40,17 +51,43 @@ func (*LinearIndexMatcher) Build() error { // Match implements IndexMatcher.Match. func (g *LinearIndexMatcher) Match(input string) []uint32 { - result := []uint32{} - result = append(result, g.fullMatcher.Match(input)...) - result = append(result, g.domainMatcher.Match(input)...) - result = append(result, g.substrMatcher.Match(input)...) - result = append(result, g.otherMatchers.Match(input)...) - return result + // Allocate capacity to prevent matches escaping to heap + result := make([][]uint32, 0, 5) + if g.full != nil { + if matches := g.full.Match(input); len(matches) > 0 { + result = append(result, matches) + } + } + if g.domain != nil { + if matches := g.domain.Match(input); len(matches) > 0 { + result = append(result, matches) + } + } + if g.substr != nil { + if matches := g.substr.Match(input); len(matches) > 0 { + result = append(result, matches) + } + } + if g.regex != nil { + if matches := g.regex.Match(input); len(matches) > 0 { + result = append(result, matches) + } + } + return CompositeMatches(result) } // MatchAny implements IndexMatcher.MatchAny. func (g *LinearIndexMatcher) MatchAny(input string) bool { - return len(g.Match(input)) > 0 + if g.full != nil && g.full.MatchAny(input) { + return true + } + if g.domain != nil && g.domain.MatchAny(input) { + return true + } + if g.substr != nil && g.substr.MatchAny(input) { + return true + } + return g.regex != nil && g.regex.MatchAny(input) } // Size implements IndexMatcher.Size. diff --git a/common/strmatcher/indexmatcher_mph.go b/common/strmatcher/indexmatcher_mph.go index 796148065..cb4c11c62 100644 --- a/common/strmatcher/indexmatcher_mph.go +++ b/common/strmatcher/indexmatcher_mph.go @@ -8,15 +8,11 @@ type MphIndexMatcher struct { count uint32 mph *MphMatcherGroup ac *ACAutomatonMatcherGroup - regex SimpleMatcherGroup + regex *SimpleMatcherGroup } func NewMphIndexMatcher() *MphIndexMatcher { - return &MphIndexMatcher{ - mph: nil, - ac: nil, - regex: SimpleMatcherGroup{}, - } + return new(MphIndexMatcher) } // Add implements IndexMatcher.Add. @@ -41,6 +37,9 @@ func (g *MphIndexMatcher) Add(matcher Matcher) uint32 { } g.ac.AddSubstrMatcher(matcher, index) case *RegexMatcher: + if g.regex == nil { + g.regex = &SimpleMatcherGroup{} + } g.regex.AddMatcher(matcher, index) } @@ -59,8 +58,24 @@ func (g *MphIndexMatcher) Build() error { } // Match implements IndexMatcher.Match. -func (*MphIndexMatcher) Match(string) []uint32 { - return nil +func (g *MphIndexMatcher) Match(input string) []uint32 { + result := make([][]uint32, 0, 5) + if g.mph != nil { + if matches := g.mph.Match(input); len(matches) > 0 { + result = append(result, matches) + } + } + if g.ac != nil { + if matches := g.ac.Match(input); len(matches) > 0 { + result = append(result, matches) + } + } + if g.regex != nil { + if matches := g.regex.Match(input); len(matches) > 0 { + result = append(result, matches) + } + } + return CompositeMatches(result) } // MatchAny implements IndexMatcher.MatchAny. @@ -71,7 +86,7 @@ func (g *MphIndexMatcher) MatchAny(input string) bool { if g.ac != nil && g.ac.MatchAny(input) { return true } - return g.regex.MatchAny(input) + return g.regex != nil && g.regex.MatchAny(input) } // Size implements IndexMatcher.Size. diff --git a/common/strmatcher/indexmatcher_mph_test.go b/common/strmatcher/indexmatcher_mph_test.go new file mode 100644 index 000000000..87c0790ca --- /dev/null +++ b/common/strmatcher/indexmatcher_mph_test.go @@ -0,0 +1,94 @@ +package strmatcher_test + +import ( + "reflect" + "testing" + + "github.com/v2fly/v2ray-core/v5/common" + . "github.com/v2fly/v2ray-core/v5/common/strmatcher" +) + +func TestMphIndexMatcher(t *testing.T) { + rules := []struct { + Type Type + Domain string + }{ + { + Type: Regex, + Domain: "apis\\.us$", + }, + { + Type: Substr, + Domain: "apis", + }, + { + Type: Domain, + Domain: "googleapis.com", + }, + { + Type: Domain, + Domain: "com", + }, + { + Type: Full, + Domain: "www.baidu.com", + }, + { + Type: Substr, + Domain: "apis", + }, + { + Type: Domain, + Domain: "googleapis.com", + }, + { + Type: Full, + Domain: "fonts.googleapis.com", + }, + { + Type: Full, + Domain: "www.baidu.com", + }, + { + Type: Domain, + Domain: "example.com", + }, + } + cases := []struct { + Input string + Output []uint32 + }{ + { + Input: "www.baidu.com", + Output: []uint32{5, 9, 4}, + }, + { + Input: "fonts.googleapis.com", + Output: []uint32{8, 3, 7, 4, 2, 6}, + }, + { + Input: "example.googleapis.com", + Output: []uint32{3, 7, 4, 2, 6}, + }, + { + Input: "testapis.us", + Output: []uint32{2, 6, 1}, + }, + { + Input: "example.com", + Output: []uint32{10, 4}, + }, + } + matcherGroup := NewMphIndexMatcher() + for _, rule := range rules { + matcher, err := rule.Type.New(rule.Domain) + common.Must(err) + matcherGroup.Add(matcher) + } + matcherGroup.Build() + for _, test := range cases { + if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) { + t.Error("unexpected output: ", m, " for test case ", test) + } + } +} diff --git a/common/strmatcher/matchergroup_ac_automation.go b/common/strmatcher/matchergroup_ac_automation.go index 1f8e2de1e..dae021daf 100644 --- a/common/strmatcher/matchergroup_ac_automation.go +++ b/common/strmatcher/matchergroup_ac_automation.go @@ -127,8 +127,8 @@ func (ac *ACAutomatonMatcherGroup) Build() error { // Match implements MatcherGroup.Match. func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 { - var suffixMatches [][]uint32 - var substrMatches [][]uint32 + suffixMatches := make([][]uint32, 0, 5) + substrMatches := make([][]uint32, 0, 5) fullMatch := true // fullMatch indicates no fail edge traversed so far. node := &ac.nodes[0] // start from root node. // 1. the match string is all through trie edge. FULL MATCH or DOMAIN @@ -177,18 +177,10 @@ func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 { suffixMatches = append(suffixMatches, values[Full]) } } - switch matches := append(substrMatches, suffixMatches...); len(matches) { // nolint: gocritic - case 0: - return nil - case 1: - return matches[0] - default: - result := []uint32{} - for i := len(matches) - 1; i >= 0; i-- { - result = append(result, matches[i]...) - } - return result + if len(substrMatches) == 0 { + return CompositeMatchesReverse(suffixMatches) } + return CompositeMatchesReverse(append(substrMatches, suffixMatches...)) } // MatchAny implements MatcherGroup.MatchAny. diff --git a/common/strmatcher/matchergroup_domain.go b/common/strmatcher/matchergroup_domain.go index bf8db2d6c..293342149 100644 --- a/common/strmatcher/matchergroup_domain.go +++ b/common/strmatcher/matchergroup_domain.go @@ -1,101 +1,109 @@ package strmatcher -import "strings" - -func breakDomain(domain string) []string { - return strings.Split(domain, ".") -} - -type node struct { - values []uint32 - sub map[string]*node +type trieNode struct { + values []uint32 + children map[string]*trieNode } // DomainMatcherGroup is an implementation of MatcherGroup. // It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based. type DomainMatcherGroup struct { - root *node + root *trieNode +} + +func NewDomainMatcherGroup() *DomainMatcherGroup { + return &DomainMatcherGroup{ + root: new(trieNode), + } } // AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher. func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) { - if g.root == nil { - g.root = new(node) - } - - current := g.root - parts := breakDomain(matcher.Pattern()) - for i := len(parts) - 1; i >= 0; i-- { - part := parts[i] - if current.sub == nil { - current.sub = make(map[string]*node) + node := g.root + pattern := matcher.Pattern() + for i := len(pattern); i > 0; { + var part string + for j := i - 1; ; j-- { + if pattern[j] == '.' { + part = pattern[j+1 : i] + i = j + break + } + if j == 0 { + part = pattern[j:i] + i = j + break + } } - next := current.sub[part] + if node.children == nil { + node.children = make(map[string]*trieNode) + } + next := node.children[part] if next == nil { - next = new(node) - current.sub[part] = next + next = new(trieNode) + node.children[part] = next } - current = next + node = next } - current.values = append(current.values, value) + node.values = append(node.values, value) } // Match implements MatcherGroup.Match. -func (g *DomainMatcherGroup) Match(domain string) []uint32 { - if domain == "" { - return nil - } - - current := g.root - if current == nil { - return nil - } - - nextPart := func(idx int) int { - for i := idx - 1; i >= 0; i-- { - if domain[i] == '.' { - return i +func (g *DomainMatcherGroup) Match(input string) []uint32 { + matches := make([][]uint32, 0, 5) + node := g.root + for i := len(input); i > 0; { + for j := i - 1; ; j-- { + if input[j] == '.' { // Domain label found + node = node.children[input[j+1:i]] + i = j + break + } + if j == 0 { // The last part of domain label + node = node.children[input[j:i]] + i = j + break } } - return -1 - } - - matches := [][]uint32{} - idx := len(domain) - for { - if idx == -1 || current.sub == nil { + if node == nil { // No more match if no trie edge transition break } - - nidx := nextPart(idx) - part := domain[nidx+1 : idx] - next := current.sub[part] - if next == nil { + if len(node.values) > 0 { // Found matched matchers + matches = append(matches, node.values) + } + if node.children == nil { // No more match if leaf node reached break } - current = next - idx = nidx - if len(current.values) > 0 { - matches = append(matches, current.values) - } - } - switch len(matches) { - case 0: - return nil - case 1: - return matches[0] - default: - result := []uint32{} - for idx := range matches { - // Insert reversely, the subdomain that matches further ranks higher - result = append(result, matches[len(matches)-1-idx]...) - } - return result } + return CompositeMatchesReverse(matches) } // MatchAny implements MatcherGroup.MatchAny. -func (g *DomainMatcherGroup) MatchAny(domain string) bool { - return len(g.Match(domain)) > 0 +func (g *DomainMatcherGroup) MatchAny(input string) bool { + node := g.root + for i := len(input); i > 0; { + for j := i - 1; ; j-- { + if input[j] == '.' { + node = node.children[input[j+1:i]] + i = j + break + } + if j == 0 { + node = node.children[input[j:i]] + i = j + break + } + } + if node == nil { + return false + } + if len(node.values) > 0 { + return true + } + if node.children == nil { + return false + } + } + return false } diff --git a/common/strmatcher/matchergroup_domain_test.go b/common/strmatcher/matchergroup_domain_test.go index 73934dd33..e419a86f5 100644 --- a/common/strmatcher/matchergroup_domain_test.go +++ b/common/strmatcher/matchergroup_domain_test.go @@ -82,7 +82,7 @@ func TestDomainMatcherGroup(t *testing.T) { Result: []uint32{4, 6}, }, } - g := new(DomainMatcherGroup) + g := NewDomainMatcherGroup() for _, pattern := range patterns { AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value) } @@ -95,7 +95,7 @@ func TestDomainMatcherGroup(t *testing.T) { } func TestEmptyDomainMatcherGroup(t *testing.T) { - g := new(DomainMatcherGroup) + g := NewDomainMatcherGroup() r := g.Match("v2fly.org") if len(r) != 0 { t.Error("Expect [], but ", r) diff --git a/common/strmatcher/matchergroup_full.go b/common/strmatcher/matchergroup_full.go index 794772945..85057b36f 100644 --- a/common/strmatcher/matchergroup_full.go +++ b/common/strmatcher/matchergroup_full.go @@ -6,25 +6,25 @@ type FullMatcherGroup struct { matchers map[string][]uint32 } +func NewFullMatcherGroup() *FullMatcherGroup { + return &FullMatcherGroup{ + matchers: make(map[string][]uint32), + } +} + // AddFullMatcher implements MatcherGroupForFull.AddFullMatcher. func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) { - if g.matchers == nil { - g.matchers = make(map[string][]uint32) - } - domain := matcher.Pattern() g.matchers[domain] = append(g.matchers[domain], value) } // Match implements MatcherGroup.Match. func (g *FullMatcherGroup) Match(input string) []uint32 { - if g.matchers == nil { - return nil - } return g.matchers[input] } // MatchAny implements MatcherGroup.Any. func (g *FullMatcherGroup) MatchAny(input string) bool { - return len(g.Match(input)) > 0 + _, found := g.matchers[input] + return found } diff --git a/common/strmatcher/matchergroup_full_test.go b/common/strmatcher/matchergroup_full_test.go index 645cb349c..7d733579f 100644 --- a/common/strmatcher/matchergroup_full_test.go +++ b/common/strmatcher/matchergroup_full_test.go @@ -50,7 +50,7 @@ func TestFullMatcherGroup(t *testing.T) { Result: []uint32{4, 6}, }, } - g := new(FullMatcherGroup) + g := NewFullMatcherGroup() for _, pattern := range patterns { AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value) } @@ -63,7 +63,7 @@ func TestFullMatcherGroup(t *testing.T) { } func TestEmptyFullMatcherGroup(t *testing.T) { - g := new(FullMatcherGroup) + g := NewFullMatcherGroup() r := g.Match("v2fly.org") if len(r) != 0 { t.Error("Expect [], but ", r) diff --git a/common/strmatcher/matchergroup_mph.go b/common/strmatcher/matchergroup_mph.go index d842e4486..f5afa5dc4 100644 --- a/common/strmatcher/matchergroup_mph.go +++ b/common/strmatcher/matchergroup_mph.go @@ -152,7 +152,7 @@ func (g *MphMatcherGroup) Lookup(rollingHash uint32, input string) uint32 { // Match implements MatcherGroup.Match. func (g *MphMatcherGroup) Match(input string) []uint32 { - matches := [][]uint32{} + matches := make([][]uint32, 0, 5) hash := uint32(0) for i := len(input) - 1; i >= 0; i-- { hash = hash*PrimeRK + uint32(input[i]) @@ -165,18 +165,7 @@ func (g *MphMatcherGroup) Match(input string) []uint32 { if mphIdx := g.Lookup(hash, input); mphIdx != 0 { matches = append(matches, g.values[mphIdx]) } - switch len(matches) { - case 0: - return nil - case 1: - return matches[0] - default: - result := []uint32{} - for i := len(matches) - 1; i >= 0; i-- { - result = append(result, matches[i]...) - } - return result - } + return CompositeMatchesReverse(matches) } // MatchAny implements MatcherGroup.MatchAny. diff --git a/common/strmatcher/matchergroup_simple.go b/common/strmatcher/matchergroup_simple.go index 7077a274c..fa6f0eb2a 100644 --- a/common/strmatcher/matchergroup_simple.go +++ b/common/strmatcher/matchergroup_simple.go @@ -32,5 +32,10 @@ func (g *SimpleMatcherGroup) Match(input string) []uint32 { // MatchAny implements MatcherGroup.MatchAny. func (g *SimpleMatcherGroup) MatchAny(input string) bool { - return len(g.Match(input)) > 0 + for _, e := range g.matchers { + if e.matcher.Match(input) { + return true + } + } + return false } diff --git a/common/strmatcher/matchergroup_substr.go b/common/strmatcher/matchergroup_substr.go index 1eb3d9692..ccaa0c9ff 100644 --- a/common/strmatcher/matchergroup_substr.go +++ b/common/strmatcher/matchergroup_substr.go @@ -20,16 +20,30 @@ func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint3 // Match implements MatcherGroup.Match. func (g *SubstrMatcherGroup) Match(input string) []uint32 { - result := []uint32{} + var result []uint32 for i, pattern := range g.patterns { for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) { result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit) } } - // Sort the match results in dictionary order, so that: - // 1. Pattern matched at smaller position (meaning matched further) takes precedence. - // 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence. - sort.Slice(result, func(i, j int) bool { return result[i] < result[j] }) + // sort.Slice will trigger allocation no matter what input is. See https://github.com/golang/go/issues/17332 + // We optimize the sorting by length to prevent memory allocation as possible. + switch len(result) { + case 0: + return nil + case 1: + // No need to sort + case 2: + // Do a simple swap if unsorted + if result[0] > result[1] { + result[0], result[1] = result[1], result[0] + } + default: + // Sort the match results in dictionary order, so that: + // 1. Pattern matched at smaller position (meaning matched further) takes precedence. + // 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence. + sort.Slice(result, func(i, j int) bool { return result[i] < result[j] }) + } for i, entry := range result { result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit) } diff --git a/common/strmatcher/matchers.go b/common/strmatcher/matchers.go index c5d6fcdfc..f13756fa4 100644 --- a/common/strmatcher/matchers.go +++ b/common/strmatcher/matchers.go @@ -4,6 +4,7 @@ import ( "errors" "regexp" "strings" + "unicode/utf8" ) // FullMatcher is an implementation of Matcher. @@ -96,6 +97,10 @@ func (t Type) New(pattern string) (Matcher, error) { case Substr: return SubstrMatcher(pattern), nil case Domain: + pattern, err := ToDomain(pattern) + if err != nil { + return nil, err + } return DomainMatcher(pattern), nil case Regex: // 1. regex matching is case-sensitive regex, err := regexp.Compile(pattern) @@ -104,10 +109,73 @@ func (t Type) New(pattern string) (Matcher, error) { } return &RegexMatcher{pattern: regex}, nil default: - panic("Unknown type") + return nil, errors.New("unknown matcher type") } } +// NewDomainPattern creates a new Matcher based on the given domain pattern. +// It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern. +func (t Type) NewDomainPattern(pattern string) (Matcher, error) { + switch t { + case Full: + pattern, err := ToDomain(pattern) + if err != nil { + return nil, err + } + return FullMatcher(pattern), nil + case Substr: + pattern, err := ToDomain(pattern) + if err != nil { + return nil, err + } + return SubstrMatcher(pattern), nil + case Domain: + pattern, err := ToDomain(pattern) + if err != nil { + return nil, err + } + return DomainMatcher(pattern), nil + case Regex: // Regex's charset not in LDH subset + regex, err := regexp.Compile(pattern) + if err != nil { + return nil, err + } + return &RegexMatcher{pattern: regex}, nil + default: + return nil, errors.New("unknown matcher type") + } +} + +// ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made. +// 1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952): +// * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers) +// * Digits 0 to 9 +// * Hyphens(-) and Periods(.) +// 2. Non-ASCII characters not supported for now. +// * May support Internationalized domain name to Punycode if needed in the future. +func ToDomain(pattern string) (string, error) { + builder := strings.Builder{} + builder.Grow(len(pattern)) + for i := 0; i < len(pattern); i++ { + c := pattern[i] + if c >= utf8.RuneSelf { + return "", errors.New("non-ASCII characters not supported for now") + } + switch { + case 'A' <= c && c <= 'Z': + c += 'a' - 'A' + case 'a' <= c && c <= 'z': + case '0' <= c && c <= '9': + case c == '-': + case c == '.': + default: + return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset") + } + builder.WriteByte(c) + } + return builder.String(), nil +} + // MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers. type MatcherGroupForAll interface { AddMatcher(matcher Matcher, value uint32) @@ -137,6 +205,10 @@ type MatcherGroupForRegex interface { // It returns error if the MatcherGroup does not accept the provided Matcher's type. // This function is provided to help writing code to test a MatcherGroup. func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error { + if g, ok := g.(IndexMatcher); ok { + g.Add(matcher) + return nil + } if g, ok := g.(MatcherGroupForAll); ok { g.AddMatcher(matcher, value) return nil @@ -165,3 +237,40 @@ func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error { } return errors.New("cannot add matcher to matcher group") } + +// CompositeMatches flattens the matches slice to produce a single matched indices slice. +// It is designed to avoid new memory allocation as possible. +func CompositeMatches(matches [][]uint32) []uint32 { + switch len(matches) { + case 0: + return nil + case 1: + return matches[0] + default: + result := make([]uint32, 0, 5) + for i := 0; i < len(matches); i++ { + result = append(result, matches[i]...) + } + return result + } +} + +// CompositeMatches flattens the matches slice to produce a single matched indices slice. +// It is designed that: +// 1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher. +// 2. Indices in the same matcher keeps their original order. +// 3. Avoid new memory allocation as possible. +func CompositeMatchesReverse(matches [][]uint32) []uint32 { + switch len(matches) { + case 0: + return nil + case 1: + return matches[0] + default: + result := make([]uint32, 0, 5) + for i := len(matches) - 1; i >= 0; i-- { + result = append(result, matches[i]...) + } + return result + } +}