1
0
mirror of https://github.com/v2fly/v2ray-core.git synced 2025-01-04 16:37:12 -05:00

Refactor: strmatcher module (#1333)

* Reorganize strmatcher's package structure

* Rename types in strmatcher package according to their file names

* Stablize strmatcher's Matcher interface

* Implement []matcherEntry as SimpleMatcherGroup

* Implement mph algorithm extracted from MphIndexMatcher as MphMatcherGroup

* Implement AddMatcher/AddFullMatcher/AddDomainMatcher/AddSubstrMatcher for each MatcherGroup

* Stablize strmatcher's MatcherGroup interface

* Stablize strmatcher's IndexMatcher interface

* Update strmatcher's benchmark

* Compatibility fix for app/router's DomainMatcher condition

* Fix code quality issue

* Fix basic matcher issues

* Update priority specification for Substr matcher
This commit is contained in:
Ye Zhihao 2021-10-31 18:01:13 +08:00 committed by GitHub
parent a66bb28aee
commit d4da365c5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 1252 additions and 545 deletions

View File

@ -98,7 +98,7 @@ func New(ctx context.Context, config *Config) (*DNS, error) {
// MatcherInfos is ensured to cover the maximum index domainMatcher could return, where matcher's index starts from 1 // MatcherInfos is ensured to cover the maximum index domainMatcher could return, where matcher's index starts from 1
matcherInfos := make([]DomainMatcherInfo, domainRuleCount+1) matcherInfos := make([]DomainMatcherInfo, domainRuleCount+1)
domainMatcher := &strmatcher.MatcherGroup{} domainMatcher := &strmatcher.LinearIndexMatcher{}
geoipContainer := router.GeoIPMatcherContainer{} geoipContainer := router.GeoIPMatcherContainer{}
for _, endpoint := range config.NameServers { for _, endpoint := range config.NameServers {

View File

@ -11,12 +11,12 @@ import (
// StaticHosts represents static domain-ip mapping in DNS server. // StaticHosts represents static domain-ip mapping in DNS server.
type StaticHosts struct { type StaticHosts struct {
ips [][]net.Address ips [][]net.Address
matchers *strmatcher.MatcherGroup matchers *strmatcher.LinearIndexMatcher
} }
// NewStaticHosts creates a new StaticHosts instance. // NewStaticHosts creates a new StaticHosts instance.
func NewStaticHosts(hosts []*HostMapping, legacy map[string]*net.IPOrDomain) (*StaticHosts, error) { func NewStaticHosts(hosts []*HostMapping, legacy map[string]*net.IPOrDomain) (*StaticHosts, error) {
g := new(strmatcher.MatcherGroup) g := new(strmatcher.LinearIndexMatcher)
sh := &StaticHosts{ sh := &StaticHosts{
ips: make([][]net.Address, len(hosts)+len(legacy)+16), ips: make([][]net.Address, len(hosts)+len(legacy)+16),
matchers: g, matchers: g,

View File

@ -64,44 +64,34 @@ func domainToMatcher(domain *routercommon.Domain) (strmatcher.Matcher, error) {
} }
type DomainMatcher struct { type DomainMatcher struct {
matchers strmatcher.IndexMatcher matcher strmatcher.IndexMatcher
} }
func NewMphMatcherGroup(domains []*routercommon.Domain) (*DomainMatcher, error) { func NewDomainMatcher(matcherType string, domains []*routercommon.Domain) (*DomainMatcher, error) {
g := strmatcher.NewMphMatcherGroup() var indexMatcher strmatcher.IndexMatcher
for _, d := range domains { switch matcherType {
matcherType, f := matcherTypeMap[d.Type] case "mph", "hybrid":
if !f { indexMatcher = strmatcher.NewMphIndexMatcher()
return nil, newError("unsupported domain type", d.Type) case "linear":
indexMatcher = strmatcher.NewLinearIndexMatcher()
default:
indexMatcher = strmatcher.NewLinearIndexMatcher()
} }
_, err := g.AddPattern(d.Value, matcherType) for _, domain := range domains {
matcher, err := domainToMatcher(domain)
if err != nil { if err != nil {
return nil, err return nil, err
} }
indexMatcher.Add(matcher)
} }
g.Build() if err := indexMatcher.Build(); err != nil {
return &DomainMatcher{
matchers: g,
}, nil
}
func NewDomainMatcher(domains []*routercommon.Domain) (*DomainMatcher, error) {
g := new(strmatcher.MatcherGroup)
for _, d := range domains {
m, err := domainToMatcher(d)
if err != nil {
return nil, err return nil, err
} }
g.Add(m) return &DomainMatcher{matcher: indexMatcher}, nil
}
return &DomainMatcher{
matchers: g,
}, nil
} }
func (m *DomainMatcher) ApplyDomain(domain string) bool { func (m *DomainMatcher) Match(domain string) bool {
return len(m.matchers.Match(strings.ToLower(domain))) > 0 return m.matcher.MatchAny(domain)
} }
// Apply implements Condition. // Apply implements Condition.
@ -110,7 +100,7 @@ func (m *DomainMatcher) Apply(ctx routing.Context) bool {
if len(domain) == 0 { if len(domain) == 0 {
return false return false
} }
return m.ApplyDomain(domain) return m.Match(domain)
} }
type MultiGeoIPMatcher struct { type MultiGeoIPMatcher struct {

View File

@ -375,9 +375,9 @@ func TestChinaSites(t *testing.T) {
domains, err := loadGeoSite("CN") domains, err := loadGeoSite("CN")
common.Must(err) common.Must(err)
matcher, err := router.NewDomainMatcher(domains) matcher, err := router.NewDomainMatcher("linear", domains)
common.Must(err) common.Must(err)
acMatcher, err := router.NewMphMatcherGroup(domains) mphMatcher, err := router.NewDomainMatcher("mph", domains)
common.Must(err) common.Must(err)
type TestCase struct { type TestCase struct {
@ -408,8 +408,8 @@ func TestChinaSites(t *testing.T) {
} }
for _, testCase := range testCases { for _, testCase := range testCases {
r1 := matcher.ApplyDomain(testCase.Domain) r1 := matcher.Match(testCase.Domain)
r2 := acMatcher.ApplyDomain(testCase.Domain) r2 := mphMatcher.Match(testCase.Domain)
if r1 != testCase.Output { if r1 != testCase.Output {
t.Error("DomainMatcher expected output ", testCase.Output, " for domain ", testCase.Domain, " but got ", r1) t.Error("DomainMatcher expected output ", testCase.Output, " for domain ", testCase.Domain, " but got ", r1)
} else if r2 != testCase.Output { } else if r2 != testCase.Output {
@ -422,7 +422,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
domains, err := loadGeoSite("CN") domains, err := loadGeoSite("CN")
common.Must(err) common.Must(err)
matcher, err := router.NewMphMatcherGroup(domains) matcher, err := router.NewDomainMatcher("mph", domains)
common.Must(err) common.Must(err)
type TestCase struct { type TestCase struct {
@ -455,7 +455,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
for _, testCase := range testCases { for _, testCase := range testCases {
_ = matcher.ApplyDomain(testCase.Domain) _ = matcher.Match(testCase.Domain)
} }
} }
} }
@ -464,7 +464,7 @@ func BenchmarkDomainMatcher(b *testing.B) {
domains, err := loadGeoSite("CN") domains, err := loadGeoSite("CN")
common.Must(err) common.Must(err)
matcher, err := router.NewDomainMatcher(domains) matcher, err := router.NewDomainMatcher("linear", domains)
common.Must(err) common.Must(err)
type TestCase struct { type TestCase struct {
@ -497,7 +497,7 @@ func BenchmarkDomainMatcher(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
for _, testCase := range testCases { for _, testCase := range testCases {
_ = matcher.ApplyDomain(testCase.Domain) _ = matcher.Match(testCase.Domain)
} }
} }
} }

View File

@ -39,23 +39,11 @@ func (rr *RoutingRule) BuildCondition() (Condition, error) {
conds := NewConditionChan() conds := NewConditionChan()
if len(rr.Domain) > 0 { if len(rr.Domain) > 0 {
switch rr.DomainMatcher { cond, err := NewDomainMatcher(rr.DomainMatcher, rr.Domain)
case "mph", "hybrid":
matcher, err := NewMphMatcherGroup(rr.Domain)
if err != nil {
return nil, newError("failed to build domain condition with MphDomainMatcher").Base(err)
}
newError("MphDomainMatcher is enabled for ", len(rr.Domain), " domain rule(s)").AtDebug().WriteToLog()
conds.Add(matcher)
case "linear":
fallthrough
default:
matcher, err := NewDomainMatcher(rr.Domain)
if err != nil { if err != nil {
return nil, newError("failed to build domain condition").Base(err) return nil, newError("failed to build domain condition").Base(err)
} }
conds.Add(matcher) conds.Add(cond)
}
} }
if len(rr.UserEmail) > 0 { if len(rr.UserEmail) > 0 {

View File

@ -49,7 +49,7 @@ func (s *statsServer) GetStats(ctx context.Context, request *GetStatsRequest) (*
} }
func (s *statsServer) QueryStats(ctx context.Context, request *QueryStatsRequest) (*QueryStatsResponse, error) { func (s *statsServer) QueryStats(ctx context.Context, request *QueryStatsRequest) (*QueryStatsResponse, error) {
mgroup := &strmatcher.MatcherGroup{} mgroup := &strmatcher.LinearIndexMatcher{}
if request.Pattern != "" { if request.Pattern != "" {
request.Patterns = append(request.Patterns, request.Pattern) request.Patterns = append(request.Patterns, request.Pattern)
} }

View File

@ -8,16 +8,18 @@ import (
. "github.com/v2fly/v2ray-core/v4/common/strmatcher" . "github.com/v2fly/v2ray-core/v4/common/strmatcher"
) )
func BenchmarkACAutomaton(b *testing.B) { // Benchmark Domain Matcher Groups
ac := NewACAutomaton()
func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) {
g := new(SimpleMatcherGroup)
for i := 1; i <= 1024; i++ { for i := 1; i <= 1024; i++ {
ac.Add(strconv.Itoa(i)+".v2fly.org", Domain) AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
} }
ac.Build()
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = ac.Match("0.v2fly.org") _ = g.Match("0.v2fly.org")
} }
} }
@ -25,7 +27,48 @@ func BenchmarkDomainMatcherGroup(b *testing.B) {
g := new(DomainMatcherGroup) g := new(DomainMatcherGroup)
for i := 1; i <= 1024; i++ { for i := 1; i <= 1024; i++ {
g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i)) AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}
func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) {
ac := NewACAutomatonMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
ac.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ac.MatchAny("0.v2fly.org")
}
}
func BenchmarkMphMatcherGroupForDomain(b *testing.B) {
mph := NewMphMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
mph.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = mph.MatchAny("0.v2fly.org")
}
}
// Benchmark Full Matcher Groups
func BenchmarkSimpleMatcherGroupForFull(b *testing.B) {
g := new(SimpleMatcherGroup)
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
} }
b.ResetTimer() b.ResetTimer()
@ -38,7 +81,7 @@ func BenchmarkFullMatcherGroup(b *testing.B) {
g := new(FullMatcherGroup) g := new(FullMatcherGroup)
for i := 1; i <= 1024; i++ { for i := 1; i <= 1024; i++ {
g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i)) AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
} }
b.ResetTimer() b.ResetTimer()
@ -47,8 +90,64 @@ func BenchmarkFullMatcherGroup(b *testing.B) {
} }
} }
func BenchmarkMarchGroup(b *testing.B) { func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) {
g := new(MatcherGroup) ac := NewACAutomatonMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
ac.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ac.MatchAny("0.v2fly.org")
}
}
func BenchmarkMphMatcherGroupFull(b *testing.B) {
mph := NewMphMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
mph.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = mph.MatchAny("0.v2fly.org")
}
}
// Benchmark Substr Matcher Groups
func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) {
g := new(SimpleMatcherGroup)
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = g.Match("0.v2fly.org")
}
}
func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) {
ac := NewACAutomatonMatcherGroup()
for i := 1; i <= 1024; i++ {
AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
}
ac.Build()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = ac.MatchAny("0.v2fly.org")
}
}
// Benchmark Index Matchers
func BenchmarkLinearIndexMatcher(b *testing.B) {
g := new(LinearIndexMatcher)
for i := 1; i <= 1024; i++ { for i := 1; i <= 1024; i++ {
m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org") m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
common.Must(err) common.Must(err)

View File

@ -1,25 +0,0 @@
package strmatcher
type FullMatcherGroup struct {
matchers map[string][]uint32
}
func (g *FullMatcherGroup) Add(domain string, value uint32) {
if g.matchers == nil {
g.matchers = make(map[string][]uint32)
}
g.matchers[domain] = append(g.matchers[domain], value)
}
func (g *FullMatcherGroup) addMatcher(m fullMatcher, value uint32) {
g.Add(string(m), value)
}
func (g *FullMatcherGroup) Match(str string) []uint32 {
if g.matchers == nil {
return nil
}
return g.matchers[str]
}

View File

@ -0,0 +1,59 @@
package strmatcher
// LinearIndexMatcher is an implementation of IndexMatcher.
// Empty initialization works.
type LinearIndexMatcher struct {
count uint32
fullMatcher FullMatcherGroup
domainMatcher DomainMatcherGroup
substrMatcher SubstrMatcherGroup
otherMatchers SimpleMatcherGroup
}
func NewLinearIndexMatcher() *LinearIndexMatcher {
return new(LinearIndexMatcher)
}
// Add implements IndexMatcher.Add.
func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 {
g.count++
index := g.count
switch matcher := matcher.(type) {
case FullMatcher:
g.fullMatcher.AddFullMatcher(matcher, index)
case DomainMatcher:
g.domainMatcher.AddDomainMatcher(matcher, index)
case SubstrMatcher:
g.substrMatcher.AddSubstrMatcher(matcher, index)
default:
g.otherMatchers.AddMatcher(matcher, index)
}
return index
}
// Build implements IndexMatcher.Build.
func (*LinearIndexMatcher) Build() error {
return nil
}
// Match implements IndexMatcher.Match.
func (g *LinearIndexMatcher) Match(input string) []uint32 {
result := []uint32{}
result = append(result, g.fullMatcher.Match(input)...)
result = append(result, g.domainMatcher.Match(input)...)
result = append(result, g.substrMatcher.Match(input)...)
result = append(result, g.otherMatchers.Match(input)...)
return result
}
// MatchAny implements IndexMatcher.MatchAny.
func (g *LinearIndexMatcher) MatchAny(input string) bool {
return len(g.Match(input)) > 0
}
// Size implements IndexMatcher.Size.
func (g *LinearIndexMatcher) Size() uint32 {
return g.count
}

View File

@ -9,7 +9,7 @@ import (
) )
// See https://github.com/v2fly/v2ray-core/issues/92#issuecomment-673238489 // See https://github.com/v2fly/v2ray-core/issues/92#issuecomment-673238489
func TestMatcherGroup(t *testing.T) { func TestLinearIndexMatcher(t *testing.T) {
rules := []struct { rules := []struct {
Type Type Type Type
Domain string Domain string
@ -73,19 +73,20 @@ func TestMatcherGroup(t *testing.T) {
}, },
{ {
Input: "testapis.us", Input: "testapis.us",
Output: []uint32{1, 2, 6}, Output: []uint32{2, 6, 1},
}, },
{ {
Input: "example.com", Input: "example.com",
Output: []uint32{10, 4}, Output: []uint32{10, 4},
}, },
} }
matcherGroup := &MatcherGroup{} matcherGroup := NewLinearIndexMatcher()
for _, rule := range rules { for _, rule := range rules {
matcher, err := rule.Type.New(rule.Domain) matcher, err := rule.Type.New(rule.Domain)
common.Must(err) common.Must(err)
matcherGroup.Add(matcher) matcherGroup.Add(matcher)
} }
matcherGroup.Build()
for _, test := range cases { for _, test := range cases {
if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) { if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
t.Error("unexpected output: ", m, " for test case ", test) t.Error("unexpected output: ", m, " for test case ", test)

View File

@ -0,0 +1,80 @@
package strmatcher
// A MphIndexMatcher is divided into three parts:
// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table;
// 2. `substr` patterns are matched by ac automaton;
// 3. `regex` patterns are matched with the regex library.
type MphIndexMatcher struct {
count uint32
mph *MphMatcherGroup
ac *ACAutomatonMatcherGroup
regex SimpleMatcherGroup
}
func NewMphIndexMatcher() *MphIndexMatcher {
return &MphIndexMatcher{
mph: nil,
ac: nil,
regex: SimpleMatcherGroup{},
}
}
// Add implements IndexMatcher.Add.
func (g *MphIndexMatcher) Add(matcher Matcher) uint32 {
g.count++
index := g.count
switch matcher := matcher.(type) {
case FullMatcher:
if g.mph == nil {
g.mph = NewMphMatcherGroup()
}
g.mph.AddFullMatcher(matcher, index)
case DomainMatcher:
if g.mph == nil {
g.mph = NewMphMatcherGroup()
}
g.mph.AddDomainMatcher(matcher, index)
case SubstrMatcher:
if g.ac == nil {
g.ac = NewACAutomatonMatcherGroup()
}
g.ac.AddSubstrMatcher(matcher, index)
case *RegexMatcher:
g.regex.AddMatcher(matcher, index)
}
return index
}
// Build implements IndexMatcher.Build.
func (g *MphIndexMatcher) Build() error {
if g.mph != nil {
g.mph.Build()
}
if g.ac != nil {
g.ac.Build()
}
return nil
}
// Match implements IndexMatcher.Match.
func (*MphIndexMatcher) Match(string) []uint32 {
return nil
}
// MatchAny implements IndexMatcher.MatchAny.
func (g *MphIndexMatcher) MatchAny(input string) bool {
if g.mph != nil && g.mph.MatchAny(input) {
return true
}
if g.ac != nil && g.ac.MatchAny(input) {
return true
}
return g.regex.MatchAny(input)
}
// Size implements IndexMatcher.Size.
func (g *MphIndexMatcher) Size() uint32 {
return g.count
}

View File

@ -21,7 +21,9 @@ type Edge struct {
nextNode int nextNode int
} }
type ACAutomaton struct { // ACAutoMationMatcherGroup is an implementation of MatcherGroup.
// It uses an AC Automata to provide support for Full, Domain and Substr matcher. Trie node is char based.
type ACAutomatonMatcherGroup struct {
trie [][validCharCount]Edge trie [][validCharCount]Edge
fail []int fail []int
exists []MatchType exists []MatchType
@ -121,8 +123,8 @@ var char2Index = []int{
'9': 52, '9': 52,
} }
func NewACAutomaton() *ACAutomaton { func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
ac := new(ACAutomaton) ac := new(ACAutomatonMatcherGroup)
ac.trie = append(ac.trie, newNode()) ac.trie = append(ac.trie, newNode())
ac.fail = append(ac.fail, 0) ac.fail = append(ac.fail, 0)
ac.exists = append(ac.exists, MatchType{ ac.exists = append(ac.exists, MatchType{
@ -132,64 +134,50 @@ func NewACAutomaton() *ACAutomaton {
return ac return ac
} }
func (ac *ACAutomaton) Add(domain string, t Type) { // AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
node := 0 func (ac *ACAutomatonMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
for i := len(domain) - 1; i >= 0; i-- { ac.addPattern(0, matcher.Pattern(), matcher.Type())
idx := char2Index[domain[i]]
if ac.trie[node][idx].nextNode == 0 {
ac.count++
if len(ac.trie) < ac.count+1 {
ac.trie = append(ac.trie, newNode())
ac.fail = append(ac.fail, 0)
ac.exists = append(ac.exists, MatchType{
matchType: Full,
exist: false,
})
}
ac.trie[node][idx] = Edge{
edgeType: TrieEdge,
nextNode: ac.count,
}
}
node = ac.trie[node][idx].nextNode
}
ac.exists[node] = MatchType{
matchType: t,
exist: true,
}
switch t {
case Domain:
ac.exists[node] = MatchType{
matchType: Full,
exist: true,
}
idx := char2Index['.']
if ac.trie[node][idx].nextNode == 0 {
ac.count++
if len(ac.trie) < ac.count+1 {
ac.trie = append(ac.trie, newNode())
ac.fail = append(ac.fail, 0)
ac.exists = append(ac.exists, MatchType{
matchType: Full,
exist: false,
})
}
ac.trie[node][idx] = Edge{
edgeType: TrieEdge,
nextNode: ac.count,
}
}
node = ac.trie[node][idx].nextNode
ac.exists[node] = MatchType{
matchType: t,
exist: true,
}
default:
break
}
} }
func (ac *ACAutomaton) Build() { // AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
func (ac *ACAutomatonMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
node := ac.addPattern(0, matcher.Pattern(), Full)
ac.addPattern(node, ".", Domain)
}
// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
func (ac *ACAutomatonMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, _ uint32) {
ac.addPattern(0, matcher.Pattern(), matcher.Type())
}
func (ac *ACAutomatonMatcherGroup) addPattern(node int, pattern string, matcherType Type) int {
for i := len(pattern) - 1; i >= 0; i-- {
idx := char2Index[pattern[i]]
if ac.trie[node][idx].nextNode == 0 {
ac.count++
if len(ac.trie) < ac.count+1 {
ac.trie = append(ac.trie, newNode())
ac.fail = append(ac.fail, 0)
ac.exists = append(ac.exists, MatchType{
matchType: Full,
exist: false,
})
}
ac.trie[node][idx] = Edge{
edgeType: TrieEdge,
nextNode: ac.count,
}
}
node = ac.trie[node][idx].nextNode
}
ac.exists[node] = MatchType{
matchType: matcherType,
exist: true,
}
return node
}
func (ac *ACAutomatonMatcherGroup) Build() {
queue := list.New() queue := list.New()
for i := 0; i < validCharCount; i++ { for i := 0; i < validCharCount; i++ {
if ac.trie[0][i].nextNode != 0 { if ac.trie[0][i].nextNode != 0 {
@ -218,7 +206,13 @@ func (ac *ACAutomaton) Build() {
} }
} }
func (ac *ACAutomaton) Match(s string) bool { // Match implements MatcherGroup.Match.
func (*ACAutomatonMatcherGroup) Match(_ string) []uint32 {
return nil
}
// MatchAny implements MatcherGroup.MatchAny.
func (ac *ACAutomatonMatcherGroup) MatchAny(s string) bool {
node := 0 node := 0
fullMatch := true fullMatch := true
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN // 1. the match string is all through trie edge. FULL MATCH or DOMAIN

View File

@ -0,0 +1,183 @@
package strmatcher_test
import (
"testing"
"github.com/v2fly/v2ray-core/v4/common"
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
)
func TestACAutomatonMatcherGroup(t *testing.T) {
cases1 := []struct {
pattern string
mType Type
input string
output bool
}{
{
pattern: "v2fly.org",
mType: Domain,
input: "www.v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "www.v3fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "2fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "xv2fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Full,
input: "v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Full,
input: "xv2fly.org",
output: false,
},
}
for _, test := range cases1 {
ac := NewACAutomatonMatcherGroup()
matcher, err := test.mType.New(test.pattern)
common.Must(err)
common.Must(AddMatcherToGroup(ac, matcher, 0))
ac.Build()
if m := ac.MatchAny(test.input); m != test.output {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
{
cases2Input := []struct {
pattern string
mType Type
}{
{
pattern: "163.com",
mType: Domain,
},
{
pattern: "m.126.com",
mType: Full,
},
{
pattern: "3.com",
mType: Full,
},
{
pattern: "google.com",
mType: Substr,
},
{
pattern: "vgoogle.com",
mType: Substr,
},
}
ac := NewACAutomatonMatcherGroup()
for _, test := range cases2Input {
matcher, err := test.mType.New(test.pattern)
common.Must(err)
common.Must(AddMatcherToGroup(ac, matcher, 0))
}
ac.Build()
cases2Output := []struct {
pattern string
res bool
}{
{
pattern: "126.com",
res: false,
},
{
pattern: "m.163.com",
res: true,
},
{
pattern: "mm163.com",
res: false,
},
{
pattern: "m.126.com",
res: true,
},
{
pattern: "163.com",
res: true,
},
{
pattern: "63.com",
res: false,
},
{
pattern: "oogle.com",
res: false,
},
{
pattern: "vvgoogle.com",
res: true,
},
}
for _, test := range cases2Output {
if m := ac.MatchAny(test.pattern); m != test.res {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
}
{
cases3Input := []struct {
pattern string
mType Type
}{
{
pattern: "video.google.com",
mType: Domain,
},
{
pattern: "gle.com",
mType: Domain,
},
}
ac := NewACAutomatonMatcherGroup()
for _, test := range cases3Input {
matcher, err := test.mType.New(test.pattern)
common.Must(err)
common.Must(AddMatcherToGroup(ac, matcher, 0))
}
ac.Build()
cases3Output := []struct {
pattern string
res bool
}{
{
pattern: "google.com",
res: false,
},
}
for _, test := range cases3Output {
if m := ac.MatchAny(test.pattern); m != test.res {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
}
}

View File

@ -11,19 +11,20 @@ type node struct {
sub map[string]*node sub map[string]*node
} }
// DomainMatcherGroup is a IndexMatcher for a large set of Domain matchers. // DomainMatcherGroup is an implementation of MatcherGroup.
// Visible for testing only. // It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
type DomainMatcherGroup struct { type DomainMatcherGroup struct {
root *node root *node
} }
func (g *DomainMatcherGroup) Add(domain string, value uint32) { // AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
if g.root == nil { if g.root == nil {
g.root = new(node) g.root = new(node)
} }
current := g.root current := g.root
parts := breakDomain(domain) parts := breakDomain(matcher.Pattern())
for i := len(parts) - 1; i >= 0; i-- { for i := len(parts) - 1; i >= 0; i-- {
part := parts[i] part := parts[i]
if current.sub == nil { if current.sub == nil {
@ -40,10 +41,7 @@ func (g *DomainMatcherGroup) Add(domain string, value uint32) {
current.values = append(current.values, value) current.values = append(current.values, value)
} }
func (g *DomainMatcherGroup) addMatcher(m domainMatcher, value uint32) { // Match implements MatcherGroup.Match.
g.Add(string(m), value)
}
func (g *DomainMatcherGroup) Match(domain string) []uint32 { func (g *DomainMatcherGroup) Match(domain string) []uint32 {
if domain == "" { if domain == "" {
return nil return nil
@ -96,3 +94,8 @@ func (g *DomainMatcherGroup) Match(domain string) []uint32 {
return result return result
} }
} }
// MatchAny implements MatcherGroup.MatchAny.
func (g *DomainMatcherGroup) MatchAny(domain string) bool {
return len(g.Match(domain)) > 0
}

View File

@ -8,15 +8,39 @@ import (
) )
func TestDomainMatcherGroup(t *testing.T) { func TestDomainMatcherGroup(t *testing.T) {
g := new(DomainMatcherGroup) patterns := []struct {
g.Add("v2fly.org", 1) Pattern string
g.Add("google.com", 2) Value uint32
g.Add("x.a.com", 3) }{
g.Add("a.b.com", 4) {
g.Add("c.a.b.com", 5) Pattern: "v2fly.org",
g.Add("x.y.com", 4) Value: 1,
g.Add("x.y.com", 6) },
{
Pattern: "google.com",
Value: 2,
},
{
Pattern: "x.a.com",
Value: 3,
},
{
Pattern: "a.b.com",
Value: 4,
},
{
Pattern: "c.a.b.com",
Value: 5,
},
{
Pattern: "x.y.com",
Value: 4,
},
{
Pattern: "x.y.com",
Value: 6,
},
}
testCases := []struct { testCases := []struct {
Domain string Domain string
Result []uint32 Result []uint32
@ -58,7 +82,10 @@ func TestDomainMatcherGroup(t *testing.T) {
Result: []uint32{4, 6}, Result: []uint32{4, 6},
}, },
} }
g := new(DomainMatcherGroup)
for _, pattern := range patterns {
AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
}
for _, testCase := range testCases { for _, testCase := range testCases {
r := g.Match(testCase.Domain) r := g.Match(testCase.Domain)
if !reflect.DeepEqual(r, testCase.Result) { if !reflect.DeepEqual(r, testCase.Result) {

View File

@ -0,0 +1,30 @@
package strmatcher
// FullMatcherGroup is an implementation of MatcherGroup.
// It uses a hash table to facilitate exact match lookup.
type FullMatcherGroup struct {
matchers map[string][]uint32
}
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
if g.matchers == nil {
g.matchers = make(map[string][]uint32)
}
domain := matcher.Pattern()
g.matchers[domain] = append(g.matchers[domain], value)
}
// Match implements MatcherGroup.Match.
func (g *FullMatcherGroup) Match(input string) []uint32 {
if g.matchers == nil {
return nil
}
return g.matchers[input]
}
// MatchAny implements MatcherGroup.Any.
func (g *FullMatcherGroup) MatchAny(input string) bool {
return len(g.Match(input)) > 0
}

View File

@ -8,13 +8,31 @@ import (
) )
func TestFullMatcherGroup(t *testing.T) { func TestFullMatcherGroup(t *testing.T) {
g := new(FullMatcherGroup) patterns := []struct {
g.Add("v2fly.org", 1) Pattern string
g.Add("google.com", 2) Value uint32
g.Add("x.a.com", 3) }{
g.Add("x.y.com", 4) {
g.Add("x.y.com", 6) Pattern: "v2fly.org",
Value: 1,
},
{
Pattern: "google.com",
Value: 2,
},
{
Pattern: "x.a.com",
Value: 3,
},
{
Pattern: "x.y.com",
Value: 4,
},
{
Pattern: "x.y.com",
Value: 6,
},
}
testCases := []struct { testCases := []struct {
Domain string Domain string
Result []uint32 Result []uint32
@ -32,7 +50,10 @@ func TestFullMatcherGroup(t *testing.T) {
Result: []uint32{4, 6}, Result: []uint32{4, 6},
}, },
} }
g := new(FullMatcherGroup)
for _, pattern := range patterns {
AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
}
for _, testCase := range testCases { for _, testCase := range testCases {
r := g.Match(testCase.Domain) r := g.Match(testCase.Domain)
if !reflect.DeepEqual(r, testCase.Result) { if !reflect.DeepEqual(r, testCase.Result) {

View File

@ -2,7 +2,6 @@ package strmatcher
import ( import (
"math/bits" "math/bits"
"regexp"
"sort" "sort"
"strings" "strings"
"unsafe" "unsafe"
@ -20,79 +19,44 @@ func RollingHash(s string) uint32 {
return h return h
} }
// A MphMatcherGroup is divided into three parts: // MphMatcherGroup is an implementation of MatcherGroup.
// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table; // It implements Rabin-Karp algorithm and minimal perfect hash table for Full and Domain matcher.
// 2. `substr` patterns are matched by ac automaton;
// 3. `regex` patterns are matched with the regex library.
type MphMatcherGroup struct { type MphMatcherGroup struct {
ac *ACAutomaton
otherMatchers []matcherEntry
rules []string rules []string
level0 []uint32 level0 []uint32
level0Mask int level0Mask int
level1 []uint32 level1 []uint32
level1Mask int level1Mask int
count uint32
ruleMap *map[string]uint32 ruleMap *map[string]uint32
} }
func (g *MphMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
h := RollingHash(pattern)
switch t {
case Domain:
(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
fallthrough
case Full:
(*g.ruleMap)[pattern] = h
default:
}
}
func NewMphMatcherGroup() *MphMatcherGroup { func NewMphMatcherGroup() *MphMatcherGroup {
return &MphMatcherGroup{ return &MphMatcherGroup{
ac: nil,
otherMatchers: nil,
rules: nil, rules: nil,
level0: nil, level0: nil,
level0Mask: 0, level0Mask: 0,
level1: nil, level1: nil,
level1Mask: 0, level1Mask: 0,
count: 1,
ruleMap: &map[string]uint32{}, ruleMap: &map[string]uint32{},
} }
} }
// AddPattern adds a pattern to MphMatcherGroup // AddFullMatcher implements MatcherGroupForFull.
func (g *MphMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) { func (g *MphMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
switch t { pattern := strings.ToLower(matcher.Pattern())
case Substr: (*g.ruleMap)[pattern] = RollingHash(pattern)
if g.ac == nil {
g.ac = NewACAutomaton()
}
g.ac.Add(pattern, t)
case Full, Domain:
pattern = strings.ToLower(pattern)
g.AddFullOrDomainPattern(pattern, t)
case Regex:
r, err := regexp.Compile(pattern)
if err != nil {
return 0, err
}
g.otherMatchers = append(g.otherMatchers, matcherEntry{
m: &regexMatcher{pattern: r},
id: g.count,
})
default:
panic("Unknown type")
}
return g.count, nil
} }
// Build builds a minimal perfect hash table and ac automaton from insert rules // AddDomainMatcher implements MatcherGroupForDomain.
func (g *MphMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
pattern := strings.ToLower(matcher.Pattern())
h := RollingHash(pattern)
(*g.ruleMap)[pattern] = h
(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
}
// Build builds a minimal perfect hash table for insert rules.
func (g *MphMatcherGroup) Build() { func (g *MphMatcherGroup) Build() {
if g.ac != nil {
g.ac.Build()
}
keyLen := len(*g.ruleMap) keyLen := len(*g.ruleMap)
if keyLen == 0 { if keyLen == 0 {
keyLen = 1 keyLen = 1
@ -127,7 +91,7 @@ func (g *MphMatcherGroup) Build() {
findSeed := true findSeed := true
tmpOcc = tmpOcc[:0] tmpOcc = tmpOcc[:0]
for _, i := range bucket.vals { for _, i := range bucket.vals {
n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask // nosemgrep
if occ[n] { if occ[n] {
for _, n := range tmpOcc { for _, n := range tmpOcc {
occ[n] = false occ[n] = false
@ -148,6 +112,34 @@ func (g *MphMatcherGroup) Build() {
} }
} }
// Lookup searches for s in t and returns its index and whether it was found.
func (g *MphMatcherGroup) Lookup(h uint32, s string) bool {
i0 := int(h) & g.level0Mask
seed := g.level0[i0]
i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask // nosemgrep
n := g.level1[i1]
return s == g.rules[int(n)]
}
// Match implements MatcherGroup.Match.
func (*MphMatcherGroup) Match(_ string) []uint32 {
return nil
}
// MatchAny implements MatcherGroup.MatchAny.
func (g *MphMatcherGroup) MatchAny(pattern string) bool {
hash := uint32(0)
for i := len(pattern) - 1; i >= 0; i-- {
hash = hash*PrimeRK + uint32(pattern[i])
if pattern[i] == '.' {
if g.Lookup(hash, pattern[i:]) {
return true
}
}
}
return g.Lookup(hash, pattern)
}
func nextPow2(v int) int { func nextPow2(v int) int {
if v <= 1 { if v <= 1 {
return 1 return 1
@ -157,45 +149,6 @@ func nextPow2(v int) int {
return int(n) return int(n)
} }
// Lookup searches for s in t and returns its index and whether it was found.
func (g *MphMatcherGroup) Lookup(h uint32, s string) bool {
i0 := int(h) & g.level0Mask
seed := g.level0[i0]
i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask
n := g.level1[i1]
return s == g.rules[int(n)]
}
// Match implements IndexMatcher.Match.
func (g *MphMatcherGroup) Match(pattern string) []uint32 {
result := []uint32{}
hash := uint32(0)
for i := len(pattern) - 1; i >= 0; i-- {
hash = hash*PrimeRK + uint32(pattern[i])
if pattern[i] == '.' {
if g.Lookup(hash, pattern[i:]) {
result = append(result, 1)
return result
}
}
}
if g.Lookup(hash, pattern) {
result = append(result, 1)
return result
}
if g.ac != nil && g.ac.Match(pattern) {
result = append(result, 1)
return result
}
for _, e := range g.otherMatchers {
if e.m.Match(pattern) {
result = append(result, e.id)
return result
}
}
return nil
}
type indexBucket struct { type indexBucket struct {
n int n int
vals []int vals []int
@ -286,7 +239,7 @@ tail:
} }
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer { func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
return unsafe.Pointer(uintptr(p) + x) return unsafe.Pointer(uintptr(p) + x) // nosemgrep
} }
func readUnaligned32(p unsafe.Pointer) uint32 { func readUnaligned32(p unsafe.Pointer) uint32 {

View File

@ -0,0 +1,174 @@
package strmatcher_test
import (
"testing"
"github.com/v2fly/v2ray-core/v4/common"
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
)
func TestMphMatcherGroup(t *testing.T) {
cases1 := []struct {
pattern string
mType Type
input string
output bool
}{
{
pattern: "v2fly.org",
mType: Domain,
input: "www.v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "www.v3fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "2fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "xv2fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Full,
input: "v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Full,
input: "xv2fly.org",
output: false,
},
}
for _, test := range cases1 {
mph := NewMphMatcherGroup()
matcher, err := test.mType.New(test.pattern)
common.Must(err)
common.Must(AddMatcherToGroup(mph, matcher, 0))
mph.Build()
if m := mph.MatchAny(test.input); m != test.output {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
{
cases2Input := []struct {
pattern string
mType Type
}{
{
pattern: "163.com",
mType: Domain,
},
{
pattern: "m.126.com",
mType: Full,
},
{
pattern: "3.com",
mType: Full,
},
}
mph := NewMphMatcherGroup()
for _, test := range cases2Input {
matcher, err := test.mType.New(test.pattern)
common.Must(err)
common.Must(AddMatcherToGroup(mph, matcher, 0))
}
mph.Build()
cases2Output := []struct {
pattern string
res bool
}{
{
pattern: "126.com",
res: false,
},
{
pattern: "m.163.com",
res: true,
},
{
pattern: "mm163.com",
res: false,
},
{
pattern: "m.126.com",
res: true,
},
{
pattern: "163.com",
res: true,
},
{
pattern: "63.com",
res: false,
},
{
pattern: "oogle.com",
res: false,
},
{
pattern: "vvgoogle.com",
res: false,
},
}
for _, test := range cases2Output {
if m := mph.MatchAny(test.pattern); m != test.res {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
}
{
cases3Input := []struct {
pattern string
mType Type
}{
{
pattern: "video.google.com",
mType: Domain,
},
{
pattern: "gle.com",
mType: Domain,
},
}
mph := NewMphMatcherGroup()
for _, test := range cases3Input {
matcher, err := test.mType.New(test.pattern)
common.Must(err)
common.Must(AddMatcherToGroup(mph, matcher, 0))
}
mph.Build()
cases3Output := []struct {
pattern string
res bool
}{
{
pattern: "google.com",
res: false,
},
}
for _, test := range cases3Output {
if m := mph.MatchAny(test.pattern); m != test.res {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
}
}

View File

@ -0,0 +1,36 @@
package strmatcher
type matcherEntry struct {
matcher Matcher
value uint32
}
// SimpleMatcherGroup is an implementation of MatcherGroup.
// It simply stores all matchers in an array and sequentially matches them.
type SimpleMatcherGroup struct {
matchers []matcherEntry
}
// AddMatcher implements MatcherGroupForAll.AddMatcher.
func (g *SimpleMatcherGroup) AddMatcher(matcher Matcher, value uint32) {
g.matchers = append(g.matchers, matcherEntry{
matcher: matcher,
value: value,
})
}
// Match implements MatcherGroup.Match.
func (g *SimpleMatcherGroup) Match(input string) []uint32 {
result := []uint32{}
for _, e := range g.matchers {
if e.matcher.Match(input) {
result = append(result, e.value)
}
}
return result
}
// MatchAny implements MatcherGroup.MatchAny.
func (g *SimpleMatcherGroup) MatchAny(input string) bool {
return len(g.Match(input)) > 0
}

View File

@ -0,0 +1,69 @@
package strmatcher_test
import (
"reflect"
"testing"
"github.com/v2fly/v2ray-core/v4/common"
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
)
func TestSimpleMatcherGroup(t *testing.T) {
patterns := []struct {
pattern string
mType Type
}{
{
pattern: "v2fly.org",
mType: Domain,
},
{
pattern: "v2fly.org",
mType: Full,
},
{
pattern: "v2fly.org",
mType: Regex,
},
}
cases := []struct {
input string
output []uint32
}{
{
input: "www.v2fly.org",
output: []uint32{0, 2},
},
{
input: "v2fly.org",
output: []uint32{0, 1, 2},
},
{
input: "www.v3fly.org",
output: []uint32{},
},
{
input: "2fly.org",
output: []uint32{},
},
{
input: "xv2fly.org",
output: []uint32{2},
},
{
input: "v2flyxorg",
output: []uint32{2},
},
}
matcherGroup := &SimpleMatcherGroup{}
for id, entry := range patterns {
matcher, err := entry.mType.New(entry.pattern)
common.Must(err)
common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id)))
}
for _, test := range cases {
if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) {
t.Error("unexpected output: ", r, " for test case ", test)
}
}
}

View File

@ -0,0 +1,47 @@
package strmatcher
import (
"sort"
"strings"
)
// SubstrMatcherGroup is implementation of MatcherGroup,
// It is simply implmeneted to comply with the priority specification of Substr matchers.
type SubstrMatcherGroup struct {
patterns []string
values []uint32
}
// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint32) {
g.patterns = append(g.patterns, matcher.Pattern())
g.values = append(g.values, value)
}
// Match implements MatcherGroup.Match.
func (g *SubstrMatcherGroup) Match(input string) []uint32 {
result := []uint32{}
for i, pattern := range g.patterns {
for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
}
}
// Sort the match results in dictionary order, so that:
// 1. Pattern matched at smaller position (meaning matched further) takes precedence.
// 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
for i, entry := range result {
result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
}
return result
}
// MatchAny implements MatcherGroup.MatchAny.
func (g *SubstrMatcherGroup) MatchAny(input string) bool {
for _, pattern := range g.patterns {
if strings.Contains(input, pattern) {
return true
}
}
return false
}

View File

@ -0,0 +1,65 @@
package strmatcher_test
import (
"reflect"
"testing"
"github.com/v2fly/v2ray-core/v4/common"
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
)
func TestSubstrMatcherGroup(t *testing.T) {
patterns := []struct {
pattern string
mType Type
}{
{
pattern: "apis",
mType: Substr,
},
{
pattern: "google",
mType: Substr,
},
{
pattern: "apis",
mType: Substr,
},
}
cases := []struct {
input string
output []uint32
}{
{
input: "google.com",
output: []uint32{1},
},
{
input: "apis.com",
output: []uint32{0, 2},
},
{
input: "googleapis.com",
output: []uint32{1, 0, 2},
},
{
input: "fonts.googleapis.com",
output: []uint32{1, 0, 2},
},
{
input: "apis.googleapis.com",
output: []uint32{0, 2, 1, 0, 2},
},
}
matcherGroup := &SubstrMatcherGroup{}
for id, entry := range patterns {
matcher, err := entry.mType.New(entry.pattern)
common.Must(err)
common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id)))
}
for _, test := range cases {
if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) {
t.Error("unexpected output: ", r, " for test case ", test)
}
}
}

View File

@ -1,52 +1,167 @@
package strmatcher package strmatcher
import ( import (
"errors"
"regexp" "regexp"
"strings" "strings"
) )
type fullMatcher string // FullMatcher is an implementation of Matcher.
type FullMatcher string
func (m fullMatcher) Match(s string) bool { func (FullMatcher) Type() Type {
return Full
}
func (m FullMatcher) Pattern() string {
return string(m)
}
func (m FullMatcher) String() string {
return "full:" + m.Pattern()
}
func (m FullMatcher) Match(s string) bool {
return string(m) == s return string(m) == s
} }
func (m fullMatcher) String() string { // DomainMatcher is an implementation of Matcher.
return "full:" + string(m) type DomainMatcher string
func (DomainMatcher) Type() Type {
return Domain
} }
type substrMatcher string func (m DomainMatcher) Pattern() string {
return string(m)
func (m substrMatcher) Match(s string) bool {
return strings.Contains(s, string(m))
} }
func (m substrMatcher) String() string { func (m DomainMatcher) String() string {
return "keyword:" + string(m) return "domain:" + m.Pattern()
} }
type domainMatcher string func (m DomainMatcher) Match(s string) bool {
pattern := m.Pattern()
func (m domainMatcher) Match(s string) bool {
pattern := string(m)
if !strings.HasSuffix(s, pattern) { if !strings.HasSuffix(s, pattern) {
return false return false
} }
return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.' return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
} }
func (m domainMatcher) String() string { // SubstrMatcher is an implementation of Matcher.
return "domain:" + string(m) type SubstrMatcher string
func (SubstrMatcher) Type() Type {
return Substr
} }
type regexMatcher struct { func (m SubstrMatcher) Pattern() string {
return string(m)
}
func (m SubstrMatcher) String() string {
return "keyword:" + m.Pattern()
}
func (m SubstrMatcher) Match(s string) bool {
return strings.Contains(s, m.Pattern())
}
// RegexMatcher is an implementation of Matcher.
type RegexMatcher struct {
pattern *regexp.Regexp pattern *regexp.Regexp
} }
func (m *regexMatcher) Match(s string) bool { func (*RegexMatcher) Type() Type {
return Regex
}
func (m *RegexMatcher) Pattern() string {
return m.pattern.String()
}
func (m *RegexMatcher) String() string {
return "regexp:" + m.Pattern()
}
func (m *RegexMatcher) Match(s string) bool {
return m.pattern.MatchString(s) return m.pattern.MatchString(s)
} }
func (m *regexMatcher) String() string { // New creates a new Matcher based on the given pattern.
return "regexp:" + m.pattern.String() func (t Type) New(pattern string) (Matcher, error) {
switch t {
case Full:
return FullMatcher(pattern), nil
case Substr:
return SubstrMatcher(pattern), nil
case Domain:
return DomainMatcher(pattern), nil
case Regex: // 1. regex matching is case-sensitive
regex, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return &RegexMatcher{pattern: regex}, nil
default:
panic("Unknown type")
}
}
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
type MatcherGroupForAll interface {
AddMatcher(matcher Matcher, value uint32)
}
// MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers.
type MatcherGroupForFull interface {
AddFullMatcher(matcher FullMatcher, value uint32)
}
// MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers.
type MatcherGroupForDomain interface {
AddDomainMatcher(matcher DomainMatcher, value uint32)
}
// MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers.
type MatcherGroupForSubstr interface {
AddSubstrMatcher(matcher SubstrMatcher, value uint32)
}
// MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers.
type MatcherGroupForRegex interface {
AddRegexMatcher(matcher *RegexMatcher, value uint32)
}
// AddMatcherGroup is a helper function to try to add a Matcher to any kind of MatcherGroup.
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
// This function is provided to help writing code to test a MatcherGroup.
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
if g, ok := g.(MatcherGroupForAll); ok {
g.AddMatcher(matcher, value)
return nil
}
switch matcher := matcher.(type) {
case FullMatcher:
if g, ok := g.(MatcherGroupForFull); ok {
g.AddFullMatcher(matcher, value)
return nil
}
case DomainMatcher:
if g, ok := g.(MatcherGroupForDomain); ok {
g.AddDomainMatcher(matcher, value)
return nil
}
case SubstrMatcher:
if g, ok := g.(MatcherGroupForSubstr); ok {
g.AddSubstrMatcher(matcher, value)
return nil
}
case *RegexMatcher:
if g, ok := g.(MatcherGroupForRegex); ok {
g.AddRegexMatcher(matcher, value)
return nil
}
}
return errors.New("cannot add matcher to matcher group")
} }

View File

@ -71,172 +71,3 @@ func TestMatcher(t *testing.T) {
} }
} }
} }
func TestACAutomaton(t *testing.T) {
cases1 := []struct {
pattern string
mType Type
input string
output bool
}{
{
pattern: "v2fly.org",
mType: Domain,
input: "www.v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "www.v3fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "2fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Domain,
input: "xv2fly.org",
output: false,
},
{
pattern: "v2fly.org",
mType: Full,
input: "v2fly.org",
output: true,
},
{
pattern: "v2fly.org",
mType: Full,
input: "xv2fly.org",
output: false,
},
}
for _, test := range cases1 {
ac := NewACAutomaton()
ac.Add(test.pattern, test.mType)
ac.Build()
if m := ac.Match(test.input); m != test.output {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
{
cases2Input := []struct {
pattern string
mType Type
}{
{
pattern: "163.com",
mType: Domain,
},
{
pattern: "m.126.com",
mType: Full,
},
{
pattern: "3.com",
mType: Full,
},
{
pattern: "google.com",
mType: Substr,
},
{
pattern: "vgoogle.com",
mType: Substr,
},
}
ac := NewACAutomaton()
for _, test := range cases2Input {
ac.Add(test.pattern, test.mType)
}
ac.Build()
cases2Output := []struct {
pattern string
res bool
}{
{
pattern: "126.com",
res: false,
},
{
pattern: "m.163.com",
res: true,
},
{
pattern: "mm163.com",
res: false,
},
{
pattern: "m.126.com",
res: true,
},
{
pattern: "163.com",
res: true,
},
{
pattern: "63.com",
res: false,
},
{
pattern: "oogle.com",
res: false,
},
{
pattern: "vvgoogle.com",
res: true,
},
}
for _, test := range cases2Output {
if m := ac.Match(test.pattern); m != test.res {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
}
{
cases3Input := []struct {
pattern string
mType Type
}{
{
pattern: "video.google.com",
mType: Domain,
},
{
pattern: "gle.com",
mType: Domain,
},
}
ac := NewACAutomaton()
for _, test := range cases3Input {
ac.Add(test.pattern, test.mType)
}
ac.Build()
cases3Output := []struct {
pattern string
res bool
}{
{
pattern: "google.com",
res: false,
},
}
for _, test := range cases3Output {
if m := ac.Match(test.pattern); m != test.res {
t.Error("unexpected output: ", m, " for test case ", test)
}
}
}
}

View File

@ -1,107 +1,74 @@
package strmatcher package strmatcher
import (
"regexp"
)
// Matcher is the interface to determine a string matches a pattern.
type Matcher interface {
// Match returns true if the given string matches a predefined pattern.
Match(string) bool
String() string
}
// Type is the type of the matcher. // Type is the type of the matcher.
type Type byte type Type byte
const ( const (
// Full is the type of matcher that the input string must exactly equal to the pattern. // Full is the type of matcher that the input string must exactly equal to the pattern.
Full Type = iota Full Type = 0
// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
Substr
// Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern. // Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
Domain Domain Type = 1
// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
Substr Type = 2
// Regex is the type of matcher that the input string must matches the regular-expression pattern. // Regex is the type of matcher that the input string must matches the regular-expression pattern.
Regex Regex Type = 3
) )
// New creates a new Matcher based on the given pattern. // Matcher is the interface to determine a string matches a pattern.
func (t Type) New(pattern string) (Matcher, error) { // * This is a basic matcher to represent a certain kind of match semantic(full, substr, domain or regex).
// 1. regex matching is case-sensitive type Matcher interface {
switch t { // Type returns the matcher's type.
case Full: Type() Type
return fullMatcher(pattern), nil
case Substr: // Pattern returns the matcher's raw string representation.
return substrMatcher(pattern), nil Pattern() string
case Domain:
return domainMatcher(pattern), nil // String returns a string representation of the matcher containing its type and pattern.
case Regex: String() string
r, err := regexp.Compile(pattern)
if err != nil { // Match returns true if the given string matches a predefined pattern.
return nil, err // * This method is seldom used for performance reason
} // and is generally taken over by their corresponding MatcherGroup.
return &regexMatcher{ Match(input string) bool
pattern: r,
}, nil
default:
panic("Unknown type")
}
} }
// IndexMatcher is the interface for matching with a group of matchers. // MatcherGroup is an advanced type of matcher to accept a bunch of basic Matchers (of certain type, not all matcher types).
type IndexMatcher interface { // For example:
// Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists. // * FullMatcherGroup accepts FullMatcher and uses a hash table to facilitate lookup.
// * DomainMatcherGroup accepts DomainMatcher and uses a trie to optimize both memory consumption and lookup speed.
type MatcherGroup interface {
// Match returns all matched matchers with their corresponding values.
Match(input string) []uint32 Match(input string) []uint32
// MatchAny returns true as soon as one matching matcher is found.
MatchAny(input string) bool
} }
type matcherEntry struct { // IndexMatcher is a general type of matcher thats accepts all kinds of basic matchers.
m Matcher // It should:
id uint32 // * Accept all Matcher types with no exception.
} // * Optimize string matching with a combination of MatcherGroups.
// * Obey certain priority order specification when returning matched Matchers.
// MatcherGroup is an implementation of IndexMatcher. type IndexMatcher interface {
// Empty initialization works. // Size returns number of matchers added to IndexMatcher.
type MatcherGroup struct { Size() uint32
count uint32
fullMatcher FullMatcherGroup // Add adds a new Matcher to IndexMatcher, and returns its index. The index will never be 0.
domainMatcher DomainMatcherGroup Add(matcher Matcher) uint32
otherMatchers []matcherEntry
} // Build builds the IndexMatcher to be ready for matching.
Build() error
// Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0.
func (g *MatcherGroup) Add(m Matcher) uint32 { // Match returns the indices of all matchers that matches the input.
g.count++ // * Empty array is returned if no such matcher exists.
c := g.count // * The order of returned matchers should follow priority specification.
// Priority specification:
switch tm := m.(type) { // 1. Priority between matcher types: full > domain > substr > regex.
case fullMatcher: // 2. Priority of same-priority matchers matching at same position: the early added takes precedence.
g.fullMatcher.addMatcher(tm, c) // 3. Priority of domain matchers matching at different levels: the further matched domain takes precedence.
case domainMatcher: // 4. Priority of substr matchers matching at different positions: the further matched substr takes precedence.
g.domainMatcher.addMatcher(tm, c) Match(input string) []uint32
default:
g.otherMatchers = append(g.otherMatchers, matcherEntry{ // MatchAny returns true as soon as one matching matcher is found.
m: m, MatchAny(input string) bool
id: c,
})
}
return c
}
// Match implements IndexMatcher.Match.
func (g *MatcherGroup) Match(pattern string) []uint32 {
result := []uint32{}
result = append(result, g.fullMatcher.Match(pattern)...)
result = append(result, g.domainMatcher.Match(pattern)...)
for _, e := range g.otherMatchers {
if e.m.Match(pattern) {
result = append(result, e.id)
}
}
return result
}
// Size returns the number of matchers in the MatcherGroup.
func (g *MatcherGroup) Size() uint32 {
return g.count
} }