diff --git a/app/dns/dns.go b/app/dns/dns.go index cddc7f834..8e894169e 100644 --- a/app/dns/dns.go +++ b/app/dns/dns.go @@ -98,7 +98,7 @@ func New(ctx context.Context, config *Config) (*DNS, error) { // MatcherInfos is ensured to cover the maximum index domainMatcher could return, where matcher's index starts from 1 matcherInfos := make([]DomainMatcherInfo, domainRuleCount+1) - domainMatcher := &strmatcher.MatcherGroup{} + domainMatcher := &strmatcher.LinearIndexMatcher{} geoipContainer := router.GeoIPMatcherContainer{} for _, endpoint := range config.NameServers { diff --git a/app/dns/hosts.go b/app/dns/hosts.go index 85d46ed79..82a53d684 100644 --- a/app/dns/hosts.go +++ b/app/dns/hosts.go @@ -11,12 +11,12 @@ import ( // StaticHosts represents static domain-ip mapping in DNS server. type StaticHosts struct { ips [][]net.Address - matchers *strmatcher.MatcherGroup + matchers *strmatcher.LinearIndexMatcher } // NewStaticHosts creates a new StaticHosts instance. func NewStaticHosts(hosts []*HostMapping, legacy map[string]*net.IPOrDomain) (*StaticHosts, error) { - g := new(strmatcher.MatcherGroup) + g := new(strmatcher.LinearIndexMatcher) sh := &StaticHosts{ ips: make([][]net.Address, len(hosts)+len(legacy)+16), matchers: g, diff --git a/app/router/condition.go b/app/router/condition.go index eea61d3bd..30bdf6f05 100644 --- a/app/router/condition.go +++ b/app/router/condition.go @@ -64,44 +64,34 @@ func domainToMatcher(domain *routercommon.Domain) (strmatcher.Matcher, error) { } type DomainMatcher struct { - matchers strmatcher.IndexMatcher + matcher strmatcher.IndexMatcher } -func NewMphMatcherGroup(domains []*routercommon.Domain) (*DomainMatcher, error) { - g := strmatcher.NewMphMatcherGroup() - for _, d := range domains { - matcherType, f := matcherTypeMap[d.Type] - if !f { - return nil, newError("unsupported domain type", d.Type) - } - _, err := g.AddPattern(d.Value, matcherType) +func NewDomainMatcher(matcherType string, domains []*routercommon.Domain) (*DomainMatcher, error) { + var indexMatcher strmatcher.IndexMatcher + switch matcherType { + case "mph", "hybrid": + indexMatcher = strmatcher.NewMphIndexMatcher() + case "linear": + indexMatcher = strmatcher.NewLinearIndexMatcher() + default: + indexMatcher = strmatcher.NewLinearIndexMatcher() + } + for _, domain := range domains { + matcher, err := domainToMatcher(domain) if err != nil { return nil, err } + indexMatcher.Add(matcher) } - g.Build() - return &DomainMatcher{ - matchers: g, - }, nil + if err := indexMatcher.Build(); err != nil { + return nil, err + } + return &DomainMatcher{matcher: indexMatcher}, nil } -func NewDomainMatcher(domains []*routercommon.Domain) (*DomainMatcher, error) { - g := new(strmatcher.MatcherGroup) - for _, d := range domains { - m, err := domainToMatcher(d) - if err != nil { - return nil, err - } - g.Add(m) - } - - return &DomainMatcher{ - matchers: g, - }, nil -} - -func (m *DomainMatcher) ApplyDomain(domain string) bool { - return len(m.matchers.Match(strings.ToLower(domain))) > 0 +func (m *DomainMatcher) Match(domain string) bool { + return m.matcher.MatchAny(domain) } // Apply implements Condition. @@ -110,7 +100,7 @@ func (m *DomainMatcher) Apply(ctx routing.Context) bool { if len(domain) == 0 { return false } - return m.ApplyDomain(domain) + return m.Match(domain) } type MultiGeoIPMatcher struct { diff --git a/app/router/condition_test.go b/app/router/condition_test.go index ae3732f66..44e264f00 100644 --- a/app/router/condition_test.go +++ b/app/router/condition_test.go @@ -375,9 +375,9 @@ func TestChinaSites(t *testing.T) { domains, err := loadGeoSite("CN") common.Must(err) - matcher, err := router.NewDomainMatcher(domains) + matcher, err := router.NewDomainMatcher("linear", domains) common.Must(err) - acMatcher, err := router.NewMphMatcherGroup(domains) + mphMatcher, err := router.NewDomainMatcher("mph", domains) common.Must(err) type TestCase struct { @@ -408,8 +408,8 @@ func TestChinaSites(t *testing.T) { } for _, testCase := range testCases { - r1 := matcher.ApplyDomain(testCase.Domain) - r2 := acMatcher.ApplyDomain(testCase.Domain) + r1 := matcher.Match(testCase.Domain) + r2 := mphMatcher.Match(testCase.Domain) if r1 != testCase.Output { t.Error("DomainMatcher expected output ", testCase.Output, " for domain ", testCase.Domain, " but got ", r1) } else if r2 != testCase.Output { @@ -422,7 +422,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) { domains, err := loadGeoSite("CN") common.Must(err) - matcher, err := router.NewMphMatcherGroup(domains) + matcher, err := router.NewDomainMatcher("mph", domains) common.Must(err) type TestCase struct { @@ -455,7 +455,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { for _, testCase := range testCases { - _ = matcher.ApplyDomain(testCase.Domain) + _ = matcher.Match(testCase.Domain) } } } @@ -464,7 +464,7 @@ func BenchmarkDomainMatcher(b *testing.B) { domains, err := loadGeoSite("CN") common.Must(err) - matcher, err := router.NewDomainMatcher(domains) + matcher, err := router.NewDomainMatcher("linear", domains) common.Must(err) type TestCase struct { @@ -497,7 +497,7 @@ func BenchmarkDomainMatcher(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { for _, testCase := range testCases { - _ = matcher.ApplyDomain(testCase.Domain) + _ = matcher.Match(testCase.Domain) } } } diff --git a/app/router/config.go b/app/router/config.go index cf943771b..908c6eff4 100644 --- a/app/router/config.go +++ b/app/router/config.go @@ -39,23 +39,11 @@ func (rr *RoutingRule) BuildCondition() (Condition, error) { conds := NewConditionChan() if len(rr.Domain) > 0 { - switch rr.DomainMatcher { - case "mph", "hybrid": - matcher, err := NewMphMatcherGroup(rr.Domain) - if err != nil { - return nil, newError("failed to build domain condition with MphDomainMatcher").Base(err) - } - newError("MphDomainMatcher is enabled for ", len(rr.Domain), " domain rule(s)").AtDebug().WriteToLog() - conds.Add(matcher) - case "linear": - fallthrough - default: - matcher, err := NewDomainMatcher(rr.Domain) - if err != nil { - return nil, newError("failed to build domain condition").Base(err) - } - conds.Add(matcher) + cond, err := NewDomainMatcher(rr.DomainMatcher, rr.Domain) + if err != nil { + return nil, newError("failed to build domain condition").Base(err) } + conds.Add(cond) } if len(rr.UserEmail) > 0 { diff --git a/app/stats/command/command.go b/app/stats/command/command.go index 20ce33d7d..b91351b5b 100644 --- a/app/stats/command/command.go +++ b/app/stats/command/command.go @@ -49,7 +49,7 @@ func (s *statsServer) GetStats(ctx context.Context, request *GetStatsRequest) (* } func (s *statsServer) QueryStats(ctx context.Context, request *QueryStatsRequest) (*QueryStatsResponse, error) { - mgroup := &strmatcher.MatcherGroup{} + mgroup := &strmatcher.LinearIndexMatcher{} if request.Pattern != "" { request.Patterns = append(request.Patterns, request.Pattern) } diff --git a/common/strmatcher/benchmark_test.go b/common/strmatcher/benchmark_test.go index 930fa0b9e..8e15daf39 100644 --- a/common/strmatcher/benchmark_test.go +++ b/common/strmatcher/benchmark_test.go @@ -8,16 +8,18 @@ import ( . "github.com/v2fly/v2ray-core/v4/common/strmatcher" ) -func BenchmarkACAutomaton(b *testing.B) { - ac := NewACAutomaton() +// Benchmark Domain Matcher Groups + +func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) { + g := new(SimpleMatcherGroup) + for i := 1; i <= 1024; i++ { - ac.Add(strconv.Itoa(i)+".v2fly.org", Domain) + AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) } - ac.Build() b.ResetTimer() for i := 0; i < b.N; i++ { - _ = ac.Match("0.v2fly.org") + _ = g.Match("0.v2fly.org") } } @@ -25,7 +27,48 @@ func BenchmarkDomainMatcherGroup(b *testing.B) { g := new(DomainMatcherGroup) for i := 1; i <= 1024; i++ { - g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i)) + AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = g.Match("0.v2fly.org") + } +} + +func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) { + ac := NewACAutomatonMatcherGroup() + for i := 1; i <= 1024; i++ { + AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + ac.Build() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = ac.MatchAny("0.v2fly.org") + } +} + +func BenchmarkMphMatcherGroupForDomain(b *testing.B) { + mph := NewMphMatcherGroup() + for i := 1; i <= 1024; i++ { + AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + mph.Build() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = mph.MatchAny("0.v2fly.org") + } +} + +// Benchmark Full Matcher Groups + +func BenchmarkSimpleMatcherGroupForFull(b *testing.B) { + g := new(SimpleMatcherGroup) + + for i := 1; i <= 1024; i++ { + AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) } b.ResetTimer() @@ -38,7 +81,7 @@ func BenchmarkFullMatcherGroup(b *testing.B) { g := new(FullMatcherGroup) for i := 1; i <= 1024; i++ { - g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i)) + AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) } b.ResetTimer() @@ -47,8 +90,64 @@ func BenchmarkFullMatcherGroup(b *testing.B) { } } -func BenchmarkMarchGroup(b *testing.B) { - g := new(MatcherGroup) +func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) { + ac := NewACAutomatonMatcherGroup() + for i := 1; i <= 1024; i++ { + AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + ac.Build() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = ac.MatchAny("0.v2fly.org") + } +} + +func BenchmarkMphMatcherGroupFull(b *testing.B) { + mph := NewMphMatcherGroup() + for i := 1; i <= 1024; i++ { + AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + mph.Build() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = mph.MatchAny("0.v2fly.org") + } +} + +// Benchmark Substr Matcher Groups + +func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) { + g := new(SimpleMatcherGroup) + + for i := 1; i <= 1024; i++ { + AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = g.Match("0.v2fly.org") + } +} + +func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) { + ac := NewACAutomatonMatcherGroup() + for i := 1; i <= 1024; i++ { + AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i)) + } + ac.Build() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = ac.MatchAny("0.v2fly.org") + } +} + +// Benchmark Index Matchers + +func BenchmarkLinearIndexMatcher(b *testing.B) { + g := new(LinearIndexMatcher) for i := 1; i <= 1024; i++ { m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org") common.Must(err) diff --git a/common/strmatcher/full_matcher.go b/common/strmatcher/full_matcher.go deleted file mode 100644 index e00d02aa9..000000000 --- a/common/strmatcher/full_matcher.go +++ /dev/null @@ -1,25 +0,0 @@ -package strmatcher - -type FullMatcherGroup struct { - matchers map[string][]uint32 -} - -func (g *FullMatcherGroup) Add(domain string, value uint32) { - if g.matchers == nil { - g.matchers = make(map[string][]uint32) - } - - g.matchers[domain] = append(g.matchers[domain], value) -} - -func (g *FullMatcherGroup) addMatcher(m fullMatcher, value uint32) { - g.Add(string(m), value) -} - -func (g *FullMatcherGroup) Match(str string) []uint32 { - if g.matchers == nil { - return nil - } - - return g.matchers[str] -} diff --git a/common/strmatcher/indexmatcher_linear.go b/common/strmatcher/indexmatcher_linear.go new file mode 100644 index 000000000..da599416e --- /dev/null +++ b/common/strmatcher/indexmatcher_linear.go @@ -0,0 +1,59 @@ +package strmatcher + +// LinearIndexMatcher is an implementation of IndexMatcher. +// Empty initialization works. +type LinearIndexMatcher struct { + count uint32 + fullMatcher FullMatcherGroup + domainMatcher DomainMatcherGroup + substrMatcher SubstrMatcherGroup + otherMatchers SimpleMatcherGroup +} + +func NewLinearIndexMatcher() *LinearIndexMatcher { + return new(LinearIndexMatcher) +} + +// Add implements IndexMatcher.Add. +func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 { + g.count++ + index := g.count + + switch matcher := matcher.(type) { + case FullMatcher: + g.fullMatcher.AddFullMatcher(matcher, index) + case DomainMatcher: + g.domainMatcher.AddDomainMatcher(matcher, index) + case SubstrMatcher: + g.substrMatcher.AddSubstrMatcher(matcher, index) + default: + g.otherMatchers.AddMatcher(matcher, index) + } + + return index +} + +// Build implements IndexMatcher.Build. +func (*LinearIndexMatcher) Build() error { + return nil +} + +// Match implements IndexMatcher.Match. +func (g *LinearIndexMatcher) Match(input string) []uint32 { + result := []uint32{} + result = append(result, g.fullMatcher.Match(input)...) + result = append(result, g.domainMatcher.Match(input)...) + result = append(result, g.substrMatcher.Match(input)...) + result = append(result, g.otherMatchers.Match(input)...) + return result +} + +// MatchAny implements IndexMatcher.MatchAny. +func (g *LinearIndexMatcher) MatchAny(input string) bool { + return len(g.Match(input)) > 0 +} + +// Size implements IndexMatcher.Size. +func (g *LinearIndexMatcher) Size() uint32 { + return g.count +} diff --git a/common/strmatcher/strmatcher_test.go b/common/strmatcher/indexmatcher_linear_test.go similarity index 91% rename from common/strmatcher/strmatcher_test.go rename to common/strmatcher/indexmatcher_linear_test.go index 233234584..baa74a56c 100644 --- a/common/strmatcher/strmatcher_test.go +++ b/common/strmatcher/indexmatcher_linear_test.go @@ -9,7 +9,7 @@ import ( ) // See https://github.com/v2fly/v2ray-core/issues/92#issuecomment-673238489 -func TestMatcherGroup(t *testing.T) { +func TestLinearIndexMatcher(t *testing.T) { rules := []struct { Type Type Domain string @@ -73,19 +73,20 @@ func TestMatcherGroup(t *testing.T) { }, { Input: "testapis.us", - Output: []uint32{1, 2, 6}, + Output: []uint32{2, 6, 1}, }, { Input: "example.com", Output: []uint32{10, 4}, }, } - matcherGroup := &MatcherGroup{} + matcherGroup := NewLinearIndexMatcher() for _, rule := range rules { matcher, err := rule.Type.New(rule.Domain) common.Must(err) matcherGroup.Add(matcher) } + matcherGroup.Build() for _, test := range cases { if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) { t.Error("unexpected output: ", m, " for test case ", test) diff --git a/common/strmatcher/indexmatcher_mph.go b/common/strmatcher/indexmatcher_mph.go new file mode 100644 index 000000000..796148065 --- /dev/null +++ b/common/strmatcher/indexmatcher_mph.go @@ -0,0 +1,80 @@ +package strmatcher + +// A MphIndexMatcher is divided into three parts: +// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table; +// 2. `substr` patterns are matched by ac automaton; +// 3. `regex` patterns are matched with the regex library. +type MphIndexMatcher struct { + count uint32 + mph *MphMatcherGroup + ac *ACAutomatonMatcherGroup + regex SimpleMatcherGroup +} + +func NewMphIndexMatcher() *MphIndexMatcher { + return &MphIndexMatcher{ + mph: nil, + ac: nil, + regex: SimpleMatcherGroup{}, + } +} + +// Add implements IndexMatcher.Add. +func (g *MphIndexMatcher) Add(matcher Matcher) uint32 { + g.count++ + index := g.count + + switch matcher := matcher.(type) { + case FullMatcher: + if g.mph == nil { + g.mph = NewMphMatcherGroup() + } + g.mph.AddFullMatcher(matcher, index) + case DomainMatcher: + if g.mph == nil { + g.mph = NewMphMatcherGroup() + } + g.mph.AddDomainMatcher(matcher, index) + case SubstrMatcher: + if g.ac == nil { + g.ac = NewACAutomatonMatcherGroup() + } + g.ac.AddSubstrMatcher(matcher, index) + case *RegexMatcher: + g.regex.AddMatcher(matcher, index) + } + + return index +} + +// Build implements IndexMatcher.Build. +func (g *MphIndexMatcher) Build() error { + if g.mph != nil { + g.mph.Build() + } + if g.ac != nil { + g.ac.Build() + } + return nil +} + +// Match implements IndexMatcher.Match. +func (*MphIndexMatcher) Match(string) []uint32 { + return nil +} + +// MatchAny implements IndexMatcher.MatchAny. +func (g *MphIndexMatcher) MatchAny(input string) bool { + if g.mph != nil && g.mph.MatchAny(input) { + return true + } + if g.ac != nil && g.ac.MatchAny(input) { + return true + } + return g.regex.MatchAny(input) +} + +// Size implements IndexMatcher.Size. +func (g *MphIndexMatcher) Size() uint32 { + return g.count +} diff --git a/common/strmatcher/ac_automaton_matcher.go b/common/strmatcher/matchergroup_ac_automation.go similarity index 68% rename from common/strmatcher/ac_automaton_matcher.go rename to common/strmatcher/matchergroup_ac_automation.go index ab7c09bdd..22f1760aa 100644 --- a/common/strmatcher/ac_automaton_matcher.go +++ b/common/strmatcher/matchergroup_ac_automation.go @@ -21,7 +21,9 @@ type Edge struct { nextNode int } -type ACAutomaton struct { +// ACAutoMationMatcherGroup is an implementation of MatcherGroup. +// It uses an AC Automata to provide support for Full, Domain and Substr matcher. Trie node is char based. +type ACAutomatonMatcherGroup struct { trie [][validCharCount]Edge fail []int exists []MatchType @@ -121,8 +123,8 @@ var char2Index = []int{ '9': 52, } -func NewACAutomaton() *ACAutomaton { - ac := new(ACAutomaton) +func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup { + ac := new(ACAutomatonMatcherGroup) ac.trie = append(ac.trie, newNode()) ac.fail = append(ac.fail, 0) ac.exists = append(ac.exists, MatchType{ @@ -132,10 +134,25 @@ func NewACAutomaton() *ACAutomaton { return ac } -func (ac *ACAutomaton) Add(domain string, t Type) { - node := 0 - for i := len(domain) - 1; i >= 0; i-- { - idx := char2Index[domain[i]] +// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher. +func (ac *ACAutomatonMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) { + ac.addPattern(0, matcher.Pattern(), matcher.Type()) +} + +// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher. +func (ac *ACAutomatonMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) { + node := ac.addPattern(0, matcher.Pattern(), Full) + ac.addPattern(node, ".", Domain) +} + +// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher. +func (ac *ACAutomatonMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, _ uint32) { + ac.addPattern(0, matcher.Pattern(), matcher.Type()) +} + +func (ac *ACAutomatonMatcherGroup) addPattern(node int, pattern string, matcherType Type) int { + for i := len(pattern) - 1; i >= 0; i-- { + idx := char2Index[pattern[i]] if ac.trie[node][idx].nextNode == 0 { ac.count++ if len(ac.trie) < ac.count+1 { @@ -154,42 +171,13 @@ func (ac *ACAutomaton) Add(domain string, t Type) { node = ac.trie[node][idx].nextNode } ac.exists[node] = MatchType{ - matchType: t, + matchType: matcherType, exist: true, } - switch t { - case Domain: - ac.exists[node] = MatchType{ - matchType: Full, - exist: true, - } - idx := char2Index['.'] - if ac.trie[node][idx].nextNode == 0 { - ac.count++ - if len(ac.trie) < ac.count+1 { - ac.trie = append(ac.trie, newNode()) - ac.fail = append(ac.fail, 0) - ac.exists = append(ac.exists, MatchType{ - matchType: Full, - exist: false, - }) - } - ac.trie[node][idx] = Edge{ - edgeType: TrieEdge, - nextNode: ac.count, - } - } - node = ac.trie[node][idx].nextNode - ac.exists[node] = MatchType{ - matchType: t, - exist: true, - } - default: - break - } + return node } -func (ac *ACAutomaton) Build() { +func (ac *ACAutomatonMatcherGroup) Build() { queue := list.New() for i := 0; i < validCharCount; i++ { if ac.trie[0][i].nextNode != 0 { @@ -218,7 +206,13 @@ func (ac *ACAutomaton) Build() { } } -func (ac *ACAutomaton) Match(s string) bool { +// Match implements MatcherGroup.Match. +func (*ACAutomatonMatcherGroup) Match(_ string) []uint32 { + return nil +} + +// MatchAny implements MatcherGroup.MatchAny. +func (ac *ACAutomatonMatcherGroup) MatchAny(s string) bool { node := 0 fullMatch := true // 1. the match string is all through trie edge. FULL MATCH or DOMAIN diff --git a/common/strmatcher/matchergroup_ac_automation_test.go b/common/strmatcher/matchergroup_ac_automation_test.go new file mode 100644 index 000000000..a70c1167b --- /dev/null +++ b/common/strmatcher/matchergroup_ac_automation_test.go @@ -0,0 +1,183 @@ +package strmatcher_test + +import ( + "testing" + + "github.com/v2fly/v2ray-core/v4/common" + . "github.com/v2fly/v2ray-core/v4/common/strmatcher" +) + +func TestACAutomatonMatcherGroup(t *testing.T) { + cases1 := []struct { + pattern string + mType Type + input string + output bool + }{ + { + pattern: "v2fly.org", + mType: Domain, + input: "www.v2fly.org", + output: true, + }, + { + pattern: "v2fly.org", + mType: Domain, + input: "v2fly.org", + output: true, + }, + { + pattern: "v2fly.org", + mType: Domain, + input: "www.v3fly.org", + output: false, + }, + { + pattern: "v2fly.org", + mType: Domain, + input: "2fly.org", + output: false, + }, + { + pattern: "v2fly.org", + mType: Domain, + input: "xv2fly.org", + output: false, + }, + { + pattern: "v2fly.org", + mType: Full, + input: "v2fly.org", + output: true, + }, + { + pattern: "v2fly.org", + mType: Full, + input: "xv2fly.org", + output: false, + }, + } + for _, test := range cases1 { + ac := NewACAutomatonMatcherGroup() + matcher, err := test.mType.New(test.pattern) + common.Must(err) + common.Must(AddMatcherToGroup(ac, matcher, 0)) + ac.Build() + if m := ac.MatchAny(test.input); m != test.output { + t.Error("unexpected output: ", m, " for test case ", test) + } + } + { + cases2Input := []struct { + pattern string + mType Type + }{ + { + pattern: "163.com", + mType: Domain, + }, + { + pattern: "m.126.com", + mType: Full, + }, + { + pattern: "3.com", + mType: Full, + }, + { + pattern: "google.com", + mType: Substr, + }, + { + pattern: "vgoogle.com", + mType: Substr, + }, + } + ac := NewACAutomatonMatcherGroup() + for _, test := range cases2Input { + matcher, err := test.mType.New(test.pattern) + common.Must(err) + common.Must(AddMatcherToGroup(ac, matcher, 0)) + } + ac.Build() + cases2Output := []struct { + pattern string + res bool + }{ + { + pattern: "126.com", + res: false, + }, + { + pattern: "m.163.com", + res: true, + }, + { + pattern: "mm163.com", + res: false, + }, + { + pattern: "m.126.com", + res: true, + }, + { + pattern: "163.com", + res: true, + }, + { + pattern: "63.com", + res: false, + }, + { + pattern: "oogle.com", + res: false, + }, + { + pattern: "vvgoogle.com", + res: true, + }, + } + for _, test := range cases2Output { + if m := ac.MatchAny(test.pattern); m != test.res { + t.Error("unexpected output: ", m, " for test case ", test) + } + } + } + + { + cases3Input := []struct { + pattern string + mType Type + }{ + { + pattern: "video.google.com", + mType: Domain, + }, + { + pattern: "gle.com", + mType: Domain, + }, + } + ac := NewACAutomatonMatcherGroup() + for _, test := range cases3Input { + matcher, err := test.mType.New(test.pattern) + common.Must(err) + common.Must(AddMatcherToGroup(ac, matcher, 0)) + } + ac.Build() + cases3Output := []struct { + pattern string + res bool + }{ + { + pattern: "google.com", + res: false, + }, + } + for _, test := range cases3Output { + if m := ac.MatchAny(test.pattern); m != test.res { + t.Error("unexpected output: ", m, " for test case ", test) + } + } + } +} diff --git a/common/strmatcher/domain_matcher.go b/common/strmatcher/matchergroup_domain.go similarity index 73% rename from common/strmatcher/domain_matcher.go rename to common/strmatcher/matchergroup_domain.go index ae8e65bc2..bf8db2d6c 100644 --- a/common/strmatcher/domain_matcher.go +++ b/common/strmatcher/matchergroup_domain.go @@ -11,19 +11,20 @@ type node struct { sub map[string]*node } -// DomainMatcherGroup is a IndexMatcher for a large set of Domain matchers. -// Visible for testing only. +// DomainMatcherGroup is an implementation of MatcherGroup. +// It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based. type DomainMatcherGroup struct { root *node } -func (g *DomainMatcherGroup) Add(domain string, value uint32) { +// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher. +func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) { if g.root == nil { g.root = new(node) } current := g.root - parts := breakDomain(domain) + parts := breakDomain(matcher.Pattern()) for i := len(parts) - 1; i >= 0; i-- { part := parts[i] if current.sub == nil { @@ -40,10 +41,7 @@ func (g *DomainMatcherGroup) Add(domain string, value uint32) { current.values = append(current.values, value) } -func (g *DomainMatcherGroup) addMatcher(m domainMatcher, value uint32) { - g.Add(string(m), value) -} - +// Match implements MatcherGroup.Match. func (g *DomainMatcherGroup) Match(domain string) []uint32 { if domain == "" { return nil @@ -96,3 +94,8 @@ func (g *DomainMatcherGroup) Match(domain string) []uint32 { return result } } + +// MatchAny implements MatcherGroup.MatchAny. +func (g *DomainMatcherGroup) MatchAny(domain string) bool { + return len(g.Match(domain)) > 0 +} diff --git a/common/strmatcher/domain_matcher_test.go b/common/strmatcher/matchergroup_domain_test.go similarity index 68% rename from common/strmatcher/domain_matcher_test.go rename to common/strmatcher/matchergroup_domain_test.go index 482902195..acd331bbf 100644 --- a/common/strmatcher/domain_matcher_test.go +++ b/common/strmatcher/matchergroup_domain_test.go @@ -8,15 +8,39 @@ import ( ) func TestDomainMatcherGroup(t *testing.T) { - g := new(DomainMatcherGroup) - g.Add("v2fly.org", 1) - g.Add("google.com", 2) - g.Add("x.a.com", 3) - g.Add("a.b.com", 4) - g.Add("c.a.b.com", 5) - g.Add("x.y.com", 4) - g.Add("x.y.com", 6) - + patterns := []struct { + Pattern string + Value uint32 + }{ + { + Pattern: "v2fly.org", + Value: 1, + }, + { + Pattern: "google.com", + Value: 2, + }, + { + Pattern: "x.a.com", + Value: 3, + }, + { + Pattern: "a.b.com", + Value: 4, + }, + { + Pattern: "c.a.b.com", + Value: 5, + }, + { + Pattern: "x.y.com", + Value: 4, + }, + { + Pattern: "x.y.com", + Value: 6, + }, + } testCases := []struct { Domain string Result []uint32 @@ -58,7 +82,10 @@ func TestDomainMatcherGroup(t *testing.T) { Result: []uint32{4, 6}, }, } - + g := new(DomainMatcherGroup) + for _, pattern := range patterns { + AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value) + } for _, testCase := range testCases { r := g.Match(testCase.Domain) if !reflect.DeepEqual(r, testCase.Result) { diff --git a/common/strmatcher/matchergroup_full.go b/common/strmatcher/matchergroup_full.go new file mode 100644 index 000000000..794772945 --- /dev/null +++ b/common/strmatcher/matchergroup_full.go @@ -0,0 +1,30 @@ +package strmatcher + +// FullMatcherGroup is an implementation of MatcherGroup. +// It uses a hash table to facilitate exact match lookup. +type FullMatcherGroup struct { + matchers map[string][]uint32 +} + +// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher. +func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) { + if g.matchers == nil { + g.matchers = make(map[string][]uint32) + } + + domain := matcher.Pattern() + g.matchers[domain] = append(g.matchers[domain], value) +} + +// Match implements MatcherGroup.Match. +func (g *FullMatcherGroup) Match(input string) []uint32 { + if g.matchers == nil { + return nil + } + return g.matchers[input] +} + +// MatchAny implements MatcherGroup.Any. +func (g *FullMatcherGroup) MatchAny(input string) bool { + return len(g.Match(input)) > 0 +} diff --git a/common/strmatcher/full_matcher_test.go b/common/strmatcher/matchergroup_full_test.go similarity index 65% rename from common/strmatcher/full_matcher_test.go rename to common/strmatcher/matchergroup_full_test.go index 1d980eee7..b3a08fd1b 100644 --- a/common/strmatcher/full_matcher_test.go +++ b/common/strmatcher/matchergroup_full_test.go @@ -8,13 +8,31 @@ import ( ) func TestFullMatcherGroup(t *testing.T) { - g := new(FullMatcherGroup) - g.Add("v2fly.org", 1) - g.Add("google.com", 2) - g.Add("x.a.com", 3) - g.Add("x.y.com", 4) - g.Add("x.y.com", 6) - + patterns := []struct { + Pattern string + Value uint32 + }{ + { + Pattern: "v2fly.org", + Value: 1, + }, + { + Pattern: "google.com", + Value: 2, + }, + { + Pattern: "x.a.com", + Value: 3, + }, + { + Pattern: "x.y.com", + Value: 4, + }, + { + Pattern: "x.y.com", + Value: 6, + }, + } testCases := []struct { Domain string Result []uint32 @@ -32,7 +50,10 @@ func TestFullMatcherGroup(t *testing.T) { Result: []uint32{4, 6}, }, } - + g := new(FullMatcherGroup) + for _, pattern := range patterns { + AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value) + } for _, testCase := range testCases { r := g.Match(testCase.Domain) if !reflect.DeepEqual(r, testCase.Result) { diff --git a/common/strmatcher/mph_matcher.go b/common/strmatcher/matchergroup_mph.go similarity index 69% rename from common/strmatcher/mph_matcher.go rename to common/strmatcher/matchergroup_mph.go index 3c10cb492..0ec1146e8 100644 --- a/common/strmatcher/mph_matcher.go +++ b/common/strmatcher/matchergroup_mph.go @@ -2,7 +2,6 @@ package strmatcher import ( "math/bits" - "regexp" "sort" "strings" "unsafe" @@ -20,79 +19,44 @@ func RollingHash(s string) uint32 { return h } -// A MphMatcherGroup is divided into three parts: -// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table; -// 2. `substr` patterns are matched by ac automaton; -// 3. `regex` patterns are matched with the regex library. +// MphMatcherGroup is an implementation of MatcherGroup. +// It implements Rabin-Karp algorithm and minimal perfect hash table for Full and Domain matcher. type MphMatcherGroup struct { - ac *ACAutomaton - otherMatchers []matcherEntry - rules []string - level0 []uint32 - level0Mask int - level1 []uint32 - level1Mask int - count uint32 - ruleMap *map[string]uint32 -} - -func (g *MphMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) { - h := RollingHash(pattern) - switch t { - case Domain: - (*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.') - fallthrough - case Full: - (*g.ruleMap)[pattern] = h - default: - } + rules []string + level0 []uint32 + level0Mask int + level1 []uint32 + level1Mask int + ruleMap *map[string]uint32 } func NewMphMatcherGroup() *MphMatcherGroup { return &MphMatcherGroup{ - ac: nil, - otherMatchers: nil, - rules: nil, - level0: nil, - level0Mask: 0, - level1: nil, - level1Mask: 0, - count: 1, - ruleMap: &map[string]uint32{}, + rules: nil, + level0: nil, + level0Mask: 0, + level1: nil, + level1Mask: 0, + ruleMap: &map[string]uint32{}, } } -// AddPattern adds a pattern to MphMatcherGroup -func (g *MphMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) { - switch t { - case Substr: - if g.ac == nil { - g.ac = NewACAutomaton() - } - g.ac.Add(pattern, t) - case Full, Domain: - pattern = strings.ToLower(pattern) - g.AddFullOrDomainPattern(pattern, t) - case Regex: - r, err := regexp.Compile(pattern) - if err != nil { - return 0, err - } - g.otherMatchers = append(g.otherMatchers, matcherEntry{ - m: ®exMatcher{pattern: r}, - id: g.count, - }) - default: - panic("Unknown type") - } - return g.count, nil +// AddFullMatcher implements MatcherGroupForFull. +func (g *MphMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) { + pattern := strings.ToLower(matcher.Pattern()) + (*g.ruleMap)[pattern] = RollingHash(pattern) } -// Build builds a minimal perfect hash table and ac automaton from insert rules +// AddDomainMatcher implements MatcherGroupForDomain. +func (g *MphMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) { + pattern := strings.ToLower(matcher.Pattern()) + h := RollingHash(pattern) + (*g.ruleMap)[pattern] = h + (*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.') +} + +// Build builds a minimal perfect hash table for insert rules. func (g *MphMatcherGroup) Build() { - if g.ac != nil { - g.ac.Build() - } keyLen := len(*g.ruleMap) if keyLen == 0 { keyLen = 1 @@ -127,7 +91,7 @@ func (g *MphMatcherGroup) Build() { findSeed := true tmpOcc = tmpOcc[:0] for _, i := range bucket.vals { - n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask + n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask // nosemgrep if occ[n] { for _, n := range tmpOcc { occ[n] = false @@ -148,6 +112,34 @@ func (g *MphMatcherGroup) Build() { } } +// Lookup searches for s in t and returns its index and whether it was found. +func (g *MphMatcherGroup) Lookup(h uint32, s string) bool { + i0 := int(h) & g.level0Mask + seed := g.level0[i0] + i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask // nosemgrep + n := g.level1[i1] + return s == g.rules[int(n)] +} + +// Match implements MatcherGroup.Match. +func (*MphMatcherGroup) Match(_ string) []uint32 { + return nil +} + +// MatchAny implements MatcherGroup.MatchAny. +func (g *MphMatcherGroup) MatchAny(pattern string) bool { + hash := uint32(0) + for i := len(pattern) - 1; i >= 0; i-- { + hash = hash*PrimeRK + uint32(pattern[i]) + if pattern[i] == '.' { + if g.Lookup(hash, pattern[i:]) { + return true + } + } + } + return g.Lookup(hash, pattern) +} + func nextPow2(v int) int { if v <= 1 { return 1 @@ -157,45 +149,6 @@ func nextPow2(v int) int { return int(n) } -// Lookup searches for s in t and returns its index and whether it was found. -func (g *MphMatcherGroup) Lookup(h uint32, s string) bool { - i0 := int(h) & g.level0Mask - seed := g.level0[i0] - i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask - n := g.level1[i1] - return s == g.rules[int(n)] -} - -// Match implements IndexMatcher.Match. -func (g *MphMatcherGroup) Match(pattern string) []uint32 { - result := []uint32{} - hash := uint32(0) - for i := len(pattern) - 1; i >= 0; i-- { - hash = hash*PrimeRK + uint32(pattern[i]) - if pattern[i] == '.' { - if g.Lookup(hash, pattern[i:]) { - result = append(result, 1) - return result - } - } - } - if g.Lookup(hash, pattern) { - result = append(result, 1) - return result - } - if g.ac != nil && g.ac.Match(pattern) { - result = append(result, 1) - return result - } - for _, e := range g.otherMatchers { - if e.m.Match(pattern) { - result = append(result, e.id) - return result - } - } - return nil -} - type indexBucket struct { n int vals []int @@ -286,7 +239,7 @@ tail: } func add(p unsafe.Pointer, x uintptr) unsafe.Pointer { - return unsafe.Pointer(uintptr(p) + x) + return unsafe.Pointer(uintptr(p) + x) // nosemgrep } func readUnaligned32(p unsafe.Pointer) uint32 { diff --git a/common/strmatcher/matchergroup_mph_test.go b/common/strmatcher/matchergroup_mph_test.go new file mode 100644 index 000000000..88b569036 --- /dev/null +++ b/common/strmatcher/matchergroup_mph_test.go @@ -0,0 +1,174 @@ +package strmatcher_test + +import ( + "testing" + + "github.com/v2fly/v2ray-core/v4/common" + . "github.com/v2fly/v2ray-core/v4/common/strmatcher" +) + +func TestMphMatcherGroup(t *testing.T) { + cases1 := []struct { + pattern string + mType Type + input string + output bool + }{ + { + pattern: "v2fly.org", + mType: Domain, + input: "www.v2fly.org", + output: true, + }, + { + pattern: "v2fly.org", + mType: Domain, + input: "v2fly.org", + output: true, + }, + { + pattern: "v2fly.org", + mType: Domain, + input: "www.v3fly.org", + output: false, + }, + { + pattern: "v2fly.org", + mType: Domain, + input: "2fly.org", + output: false, + }, + { + pattern: "v2fly.org", + mType: Domain, + input: "xv2fly.org", + output: false, + }, + { + pattern: "v2fly.org", + mType: Full, + input: "v2fly.org", + output: true, + }, + { + pattern: "v2fly.org", + mType: Full, + input: "xv2fly.org", + output: false, + }, + } + for _, test := range cases1 { + mph := NewMphMatcherGroup() + matcher, err := test.mType.New(test.pattern) + common.Must(err) + common.Must(AddMatcherToGroup(mph, matcher, 0)) + mph.Build() + if m := mph.MatchAny(test.input); m != test.output { + t.Error("unexpected output: ", m, " for test case ", test) + } + } + { + cases2Input := []struct { + pattern string + mType Type + }{ + { + pattern: "163.com", + mType: Domain, + }, + { + pattern: "m.126.com", + mType: Full, + }, + { + pattern: "3.com", + mType: Full, + }, + } + mph := NewMphMatcherGroup() + for _, test := range cases2Input { + matcher, err := test.mType.New(test.pattern) + common.Must(err) + common.Must(AddMatcherToGroup(mph, matcher, 0)) + } + mph.Build() + cases2Output := []struct { + pattern string + res bool + }{ + { + pattern: "126.com", + res: false, + }, + { + pattern: "m.163.com", + res: true, + }, + { + pattern: "mm163.com", + res: false, + }, + { + pattern: "m.126.com", + res: true, + }, + { + pattern: "163.com", + res: true, + }, + { + pattern: "63.com", + res: false, + }, + { + pattern: "oogle.com", + res: false, + }, + { + pattern: "vvgoogle.com", + res: false, + }, + } + for _, test := range cases2Output { + if m := mph.MatchAny(test.pattern); m != test.res { + t.Error("unexpected output: ", m, " for test case ", test) + } + } + } + { + cases3Input := []struct { + pattern string + mType Type + }{ + { + pattern: "video.google.com", + mType: Domain, + }, + { + pattern: "gle.com", + mType: Domain, + }, + } + mph := NewMphMatcherGroup() + for _, test := range cases3Input { + matcher, err := test.mType.New(test.pattern) + common.Must(err) + common.Must(AddMatcherToGroup(mph, matcher, 0)) + } + mph.Build() + cases3Output := []struct { + pattern string + res bool + }{ + { + pattern: "google.com", + res: false, + }, + } + for _, test := range cases3Output { + if m := mph.MatchAny(test.pattern); m != test.res { + t.Error("unexpected output: ", m, " for test case ", test) + } + } + } +} diff --git a/common/strmatcher/matchergroup_simple.go b/common/strmatcher/matchergroup_simple.go new file mode 100644 index 000000000..7077a274c --- /dev/null +++ b/common/strmatcher/matchergroup_simple.go @@ -0,0 +1,36 @@ +package strmatcher + +type matcherEntry struct { + matcher Matcher + value uint32 +} + +// SimpleMatcherGroup is an implementation of MatcherGroup. +// It simply stores all matchers in an array and sequentially matches them. +type SimpleMatcherGroup struct { + matchers []matcherEntry +} + +// AddMatcher implements MatcherGroupForAll.AddMatcher. +func (g *SimpleMatcherGroup) AddMatcher(matcher Matcher, value uint32) { + g.matchers = append(g.matchers, matcherEntry{ + matcher: matcher, + value: value, + }) +} + +// Match implements MatcherGroup.Match. +func (g *SimpleMatcherGroup) Match(input string) []uint32 { + result := []uint32{} + for _, e := range g.matchers { + if e.matcher.Match(input) { + result = append(result, e.value) + } + } + return result +} + +// MatchAny implements MatcherGroup.MatchAny. +func (g *SimpleMatcherGroup) MatchAny(input string) bool { + return len(g.Match(input)) > 0 +} diff --git a/common/strmatcher/matchergroup_simple_test.go b/common/strmatcher/matchergroup_simple_test.go new file mode 100644 index 000000000..18ef806e7 --- /dev/null +++ b/common/strmatcher/matchergroup_simple_test.go @@ -0,0 +1,69 @@ +package strmatcher_test + +import ( + "reflect" + "testing" + + "github.com/v2fly/v2ray-core/v4/common" + . "github.com/v2fly/v2ray-core/v4/common/strmatcher" +) + +func TestSimpleMatcherGroup(t *testing.T) { + patterns := []struct { + pattern string + mType Type + }{ + { + pattern: "v2fly.org", + mType: Domain, + }, + { + pattern: "v2fly.org", + mType: Full, + }, + { + pattern: "v2fly.org", + mType: Regex, + }, + } + cases := []struct { + input string + output []uint32 + }{ + { + input: "www.v2fly.org", + output: []uint32{0, 2}, + }, + { + input: "v2fly.org", + output: []uint32{0, 1, 2}, + }, + { + input: "www.v3fly.org", + output: []uint32{}, + }, + { + input: "2fly.org", + output: []uint32{}, + }, + { + input: "xv2fly.org", + output: []uint32{2}, + }, + { + input: "v2flyxorg", + output: []uint32{2}, + }, + } + matcherGroup := &SimpleMatcherGroup{} + for id, entry := range patterns { + matcher, err := entry.mType.New(entry.pattern) + common.Must(err) + common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id))) + } + for _, test := range cases { + if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) { + t.Error("unexpected output: ", r, " for test case ", test) + } + } +} diff --git a/common/strmatcher/matchergroup_substr.go b/common/strmatcher/matchergroup_substr.go new file mode 100644 index 000000000..1eb3d9692 --- /dev/null +++ b/common/strmatcher/matchergroup_substr.go @@ -0,0 +1,47 @@ +package strmatcher + +import ( + "sort" + "strings" +) + +// SubstrMatcherGroup is implementation of MatcherGroup, +// It is simply implmeneted to comply with the priority specification of Substr matchers. +type SubstrMatcherGroup struct { + patterns []string + values []uint32 +} + +// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher. +func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint32) { + g.patterns = append(g.patterns, matcher.Pattern()) + g.values = append(g.values, value) +} + +// Match implements MatcherGroup.Match. +func (g *SubstrMatcherGroup) Match(input string) []uint32 { + result := []uint32{} + for i, pattern := range g.patterns { + for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) { + result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit) + } + } + // Sort the match results in dictionary order, so that: + // 1. Pattern matched at smaller position (meaning matched further) takes precedence. + // 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence. + sort.Slice(result, func(i, j int) bool { return result[i] < result[j] }) + for i, entry := range result { + result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit) + } + return result +} + +// MatchAny implements MatcherGroup.MatchAny. +func (g *SubstrMatcherGroup) MatchAny(input string) bool { + for _, pattern := range g.patterns { + if strings.Contains(input, pattern) { + return true + } + } + return false +} diff --git a/common/strmatcher/matchergroup_substr_test.go b/common/strmatcher/matchergroup_substr_test.go new file mode 100644 index 000000000..37e90abb9 --- /dev/null +++ b/common/strmatcher/matchergroup_substr_test.go @@ -0,0 +1,65 @@ +package strmatcher_test + +import ( + "reflect" + "testing" + + "github.com/v2fly/v2ray-core/v4/common" + . "github.com/v2fly/v2ray-core/v4/common/strmatcher" +) + +func TestSubstrMatcherGroup(t *testing.T) { + patterns := []struct { + pattern string + mType Type + }{ + { + pattern: "apis", + mType: Substr, + }, + { + pattern: "google", + mType: Substr, + }, + { + pattern: "apis", + mType: Substr, + }, + } + cases := []struct { + input string + output []uint32 + }{ + { + input: "google.com", + output: []uint32{1}, + }, + { + input: "apis.com", + output: []uint32{0, 2}, + }, + { + input: "googleapis.com", + output: []uint32{1, 0, 2}, + }, + { + input: "fonts.googleapis.com", + output: []uint32{1, 0, 2}, + }, + { + input: "apis.googleapis.com", + output: []uint32{0, 2, 1, 0, 2}, + }, + } + matcherGroup := &SubstrMatcherGroup{} + for id, entry := range patterns { + matcher, err := entry.mType.New(entry.pattern) + common.Must(err) + common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id))) + } + for _, test := range cases { + if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) { + t.Error("unexpected output: ", r, " for test case ", test) + } + } +} diff --git a/common/strmatcher/matchers.go b/common/strmatcher/matchers.go index b5ab09c4c..79ed3c176 100644 --- a/common/strmatcher/matchers.go +++ b/common/strmatcher/matchers.go @@ -1,52 +1,167 @@ package strmatcher import ( + "errors" "regexp" "strings" ) -type fullMatcher string +// FullMatcher is an implementation of Matcher. +type FullMatcher string -func (m fullMatcher) Match(s string) bool { +func (FullMatcher) Type() Type { + return Full +} + +func (m FullMatcher) Pattern() string { + return string(m) +} + +func (m FullMatcher) String() string { + return "full:" + m.Pattern() +} + +func (m FullMatcher) Match(s string) bool { return string(m) == s } -func (m fullMatcher) String() string { - return "full:" + string(m) +// DomainMatcher is an implementation of Matcher. +type DomainMatcher string + +func (DomainMatcher) Type() Type { + return Domain } -type substrMatcher string - -func (m substrMatcher) Match(s string) bool { - return strings.Contains(s, string(m)) +func (m DomainMatcher) Pattern() string { + return string(m) } -func (m substrMatcher) String() string { - return "keyword:" + string(m) +func (m DomainMatcher) String() string { + return "domain:" + m.Pattern() } -type domainMatcher string - -func (m domainMatcher) Match(s string) bool { - pattern := string(m) +func (m DomainMatcher) Match(s string) bool { + pattern := m.Pattern() if !strings.HasSuffix(s, pattern) { return false } return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.' } -func (m domainMatcher) String() string { - return "domain:" + string(m) +// SubstrMatcher is an implementation of Matcher. +type SubstrMatcher string + +func (SubstrMatcher) Type() Type { + return Substr } -type regexMatcher struct { +func (m SubstrMatcher) Pattern() string { + return string(m) +} + +func (m SubstrMatcher) String() string { + return "keyword:" + m.Pattern() +} + +func (m SubstrMatcher) Match(s string) bool { + return strings.Contains(s, m.Pattern()) +} + +// RegexMatcher is an implementation of Matcher. +type RegexMatcher struct { pattern *regexp.Regexp } -func (m *regexMatcher) Match(s string) bool { +func (*RegexMatcher) Type() Type { + return Regex +} + +func (m *RegexMatcher) Pattern() string { + return m.pattern.String() +} + +func (m *RegexMatcher) String() string { + return "regexp:" + m.Pattern() +} + +func (m *RegexMatcher) Match(s string) bool { return m.pattern.MatchString(s) } -func (m *regexMatcher) String() string { - return "regexp:" + m.pattern.String() +// New creates a new Matcher based on the given pattern. +func (t Type) New(pattern string) (Matcher, error) { + switch t { + case Full: + return FullMatcher(pattern), nil + case Substr: + return SubstrMatcher(pattern), nil + case Domain: + return DomainMatcher(pattern), nil + case Regex: // 1. regex matching is case-sensitive + regex, err := regexp.Compile(pattern) + if err != nil { + return nil, err + } + return &RegexMatcher{pattern: regex}, nil + default: + panic("Unknown type") + } +} + +// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers. +type MatcherGroupForAll interface { + AddMatcher(matcher Matcher, value uint32) +} + +// MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers. +type MatcherGroupForFull interface { + AddFullMatcher(matcher FullMatcher, value uint32) +} + +// MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers. +type MatcherGroupForDomain interface { + AddDomainMatcher(matcher DomainMatcher, value uint32) +} + +// MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers. +type MatcherGroupForSubstr interface { + AddSubstrMatcher(matcher SubstrMatcher, value uint32) +} + +// MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers. +type MatcherGroupForRegex interface { + AddRegexMatcher(matcher *RegexMatcher, value uint32) +} + +// AddMatcherGroup is a helper function to try to add a Matcher to any kind of MatcherGroup. +// It returns error if the MatcherGroup does not accept the provided Matcher's type. +// This function is provided to help writing code to test a MatcherGroup. +func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error { + if g, ok := g.(MatcherGroupForAll); ok { + g.AddMatcher(matcher, value) + return nil + } + switch matcher := matcher.(type) { + case FullMatcher: + if g, ok := g.(MatcherGroupForFull); ok { + g.AddFullMatcher(matcher, value) + return nil + } + case DomainMatcher: + if g, ok := g.(MatcherGroupForDomain); ok { + g.AddDomainMatcher(matcher, value) + return nil + } + case SubstrMatcher: + if g, ok := g.(MatcherGroupForSubstr); ok { + g.AddSubstrMatcher(matcher, value) + return nil + } + case *RegexMatcher: + if g, ok := g.(MatcherGroupForRegex); ok { + g.AddRegexMatcher(matcher, value) + return nil + } + } + return errors.New("cannot add matcher to matcher group") } diff --git a/common/strmatcher/matchers_test.go b/common/strmatcher/matchers_test.go index 8e549eae4..a644164f2 100644 --- a/common/strmatcher/matchers_test.go +++ b/common/strmatcher/matchers_test.go @@ -71,172 +71,3 @@ func TestMatcher(t *testing.T) { } } } - -func TestACAutomaton(t *testing.T) { - cases1 := []struct { - pattern string - mType Type - input string - output bool - }{ - { - pattern: "v2fly.org", - mType: Domain, - input: "www.v2fly.org", - output: true, - }, - { - pattern: "v2fly.org", - mType: Domain, - input: "v2fly.org", - output: true, - }, - { - pattern: "v2fly.org", - mType: Domain, - input: "www.v3fly.org", - output: false, - }, - { - pattern: "v2fly.org", - mType: Domain, - input: "2fly.org", - output: false, - }, - { - pattern: "v2fly.org", - mType: Domain, - input: "xv2fly.org", - output: false, - }, - { - pattern: "v2fly.org", - mType: Full, - input: "v2fly.org", - output: true, - }, - { - pattern: "v2fly.org", - mType: Full, - input: "xv2fly.org", - output: false, - }, - } - for _, test := range cases1 { - ac := NewACAutomaton() - ac.Add(test.pattern, test.mType) - ac.Build() - if m := ac.Match(test.input); m != test.output { - t.Error("unexpected output: ", m, " for test case ", test) - } - } - { - cases2Input := []struct { - pattern string - mType Type - }{ - { - pattern: "163.com", - mType: Domain, - }, - { - pattern: "m.126.com", - mType: Full, - }, - { - pattern: "3.com", - mType: Full, - }, - { - pattern: "google.com", - mType: Substr, - }, - { - pattern: "vgoogle.com", - mType: Substr, - }, - } - ac := NewACAutomaton() - for _, test := range cases2Input { - ac.Add(test.pattern, test.mType) - } - ac.Build() - cases2Output := []struct { - pattern string - res bool - }{ - { - pattern: "126.com", - res: false, - }, - { - pattern: "m.163.com", - res: true, - }, - { - pattern: "mm163.com", - res: false, - }, - { - pattern: "m.126.com", - res: true, - }, - { - pattern: "163.com", - res: true, - }, - { - pattern: "63.com", - res: false, - }, - { - pattern: "oogle.com", - res: false, - }, - { - pattern: "vvgoogle.com", - res: true, - }, - } - for _, test := range cases2Output { - if m := ac.Match(test.pattern); m != test.res { - t.Error("unexpected output: ", m, " for test case ", test) - } - } - } - - { - cases3Input := []struct { - pattern string - mType Type - }{ - { - pattern: "video.google.com", - mType: Domain, - }, - { - pattern: "gle.com", - mType: Domain, - }, - } - ac := NewACAutomaton() - for _, test := range cases3Input { - ac.Add(test.pattern, test.mType) - } - ac.Build() - cases3Output := []struct { - pattern string - res bool - }{ - { - pattern: "google.com", - res: false, - }, - } - for _, test := range cases3Output { - if m := ac.Match(test.pattern); m != test.res { - t.Error("unexpected output: ", m, " for test case ", test) - } - } - } -} diff --git a/common/strmatcher/strmatcher.go b/common/strmatcher/strmatcher.go index 294e6e73b..a399e74eb 100644 --- a/common/strmatcher/strmatcher.go +++ b/common/strmatcher/strmatcher.go @@ -1,107 +1,74 @@ package strmatcher -import ( - "regexp" -) - -// Matcher is the interface to determine a string matches a pattern. -type Matcher interface { - // Match returns true if the given string matches a predefined pattern. - Match(string) bool - String() string -} - // Type is the type of the matcher. type Type byte const ( // Full is the type of matcher that the input string must exactly equal to the pattern. - Full Type = iota - // Substr is the type of matcher that the input string must contain the pattern as a sub-string. - Substr + Full Type = 0 // Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern. - Domain + Domain Type = 1 + // Substr is the type of matcher that the input string must contain the pattern as a sub-string. + Substr Type = 2 // Regex is the type of matcher that the input string must matches the regular-expression pattern. - Regex + Regex Type = 3 ) -// New creates a new Matcher based on the given pattern. -func (t Type) New(pattern string) (Matcher, error) { - // 1. regex matching is case-sensitive - switch t { - case Full: - return fullMatcher(pattern), nil - case Substr: - return substrMatcher(pattern), nil - case Domain: - return domainMatcher(pattern), nil - case Regex: - r, err := regexp.Compile(pattern) - if err != nil { - return nil, err - } - return ®exMatcher{ - pattern: r, - }, nil - default: - panic("Unknown type") - } +// Matcher is the interface to determine a string matches a pattern. +// * This is a basic matcher to represent a certain kind of match semantic(full, substr, domain or regex). +type Matcher interface { + // Type returns the matcher's type. + Type() Type + + // Pattern returns the matcher's raw string representation. + Pattern() string + + // String returns a string representation of the matcher containing its type and pattern. + String() string + + // Match returns true if the given string matches a predefined pattern. + // * This method is seldom used for performance reason + // and is generally taken over by their corresponding MatcherGroup. + Match(input string) bool } -// IndexMatcher is the interface for matching with a group of matchers. -type IndexMatcher interface { - // Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists. +// MatcherGroup is an advanced type of matcher to accept a bunch of basic Matchers (of certain type, not all matcher types). +// For example: +// * FullMatcherGroup accepts FullMatcher and uses a hash table to facilitate lookup. +// * DomainMatcherGroup accepts DomainMatcher and uses a trie to optimize both memory consumption and lookup speed. +type MatcherGroup interface { + // Match returns all matched matchers with their corresponding values. Match(input string) []uint32 + + // MatchAny returns true as soon as one matching matcher is found. + MatchAny(input string) bool } -type matcherEntry struct { - m Matcher - id uint32 -} - -// MatcherGroup is an implementation of IndexMatcher. -// Empty initialization works. -type MatcherGroup struct { - count uint32 - fullMatcher FullMatcherGroup - domainMatcher DomainMatcherGroup - otherMatchers []matcherEntry -} - -// Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0. -func (g *MatcherGroup) Add(m Matcher) uint32 { - g.count++ - c := g.count - - switch tm := m.(type) { - case fullMatcher: - g.fullMatcher.addMatcher(tm, c) - case domainMatcher: - g.domainMatcher.addMatcher(tm, c) - default: - g.otherMatchers = append(g.otherMatchers, matcherEntry{ - m: m, - id: c, - }) - } - - return c -} - -// Match implements IndexMatcher.Match. -func (g *MatcherGroup) Match(pattern string) []uint32 { - result := []uint32{} - result = append(result, g.fullMatcher.Match(pattern)...) - result = append(result, g.domainMatcher.Match(pattern)...) - for _, e := range g.otherMatchers { - if e.m.Match(pattern) { - result = append(result, e.id) - } - } - return result -} - -// Size returns the number of matchers in the MatcherGroup. -func (g *MatcherGroup) Size() uint32 { - return g.count +// IndexMatcher is a general type of matcher thats accepts all kinds of basic matchers. +// It should: +// * Accept all Matcher types with no exception. +// * Optimize string matching with a combination of MatcherGroups. +// * Obey certain priority order specification when returning matched Matchers. +type IndexMatcher interface { + // Size returns number of matchers added to IndexMatcher. + Size() uint32 + + // Add adds a new Matcher to IndexMatcher, and returns its index. The index will never be 0. + Add(matcher Matcher) uint32 + + // Build builds the IndexMatcher to be ready for matching. + Build() error + + // Match returns the indices of all matchers that matches the input. + // * Empty array is returned if no such matcher exists. + // * The order of returned matchers should follow priority specification. + // Priority specification: + // 1. Priority between matcher types: full > domain > substr > regex. + // 2. Priority of same-priority matchers matching at same position: the early added takes precedence. + // 3. Priority of domain matchers matching at different levels: the further matched domain takes precedence. + // 4. Priority of substr matchers matching at different positions: the further matched substr takes precedence. + Match(input string) []uint32 + + // MatchAny returns true as soon as one matching matcher is found. + MatchAny(input string) bool }