mirror of
https://github.com/v2fly/v2ray-core.git
synced 2024-12-30 05:56:54 -05:00
Refactor: strmatcher module (#1333)
* Reorganize strmatcher's package structure * Rename types in strmatcher package according to their file names * Stablize strmatcher's Matcher interface * Implement []matcherEntry as SimpleMatcherGroup * Implement mph algorithm extracted from MphIndexMatcher as MphMatcherGroup * Implement AddMatcher/AddFullMatcher/AddDomainMatcher/AddSubstrMatcher for each MatcherGroup * Stablize strmatcher's MatcherGroup interface * Stablize strmatcher's IndexMatcher interface * Update strmatcher's benchmark * Compatibility fix for app/router's DomainMatcher condition * Fix code quality issue * Fix basic matcher issues * Update priority specification for Substr matcher
This commit is contained in:
parent
a66bb28aee
commit
d4da365c5f
@ -98,7 +98,7 @@ func New(ctx context.Context, config *Config) (*DNS, error) {
|
||||
|
||||
// MatcherInfos is ensured to cover the maximum index domainMatcher could return, where matcher's index starts from 1
|
||||
matcherInfos := make([]DomainMatcherInfo, domainRuleCount+1)
|
||||
domainMatcher := &strmatcher.MatcherGroup{}
|
||||
domainMatcher := &strmatcher.LinearIndexMatcher{}
|
||||
geoipContainer := router.GeoIPMatcherContainer{}
|
||||
|
||||
for _, endpoint := range config.NameServers {
|
||||
|
@ -11,12 +11,12 @@ import (
|
||||
// StaticHosts represents static domain-ip mapping in DNS server.
|
||||
type StaticHosts struct {
|
||||
ips [][]net.Address
|
||||
matchers *strmatcher.MatcherGroup
|
||||
matchers *strmatcher.LinearIndexMatcher
|
||||
}
|
||||
|
||||
// NewStaticHosts creates a new StaticHosts instance.
|
||||
func NewStaticHosts(hosts []*HostMapping, legacy map[string]*net.IPOrDomain) (*StaticHosts, error) {
|
||||
g := new(strmatcher.MatcherGroup)
|
||||
g := new(strmatcher.LinearIndexMatcher)
|
||||
sh := &StaticHosts{
|
||||
ips: make([][]net.Address, len(hosts)+len(legacy)+16),
|
||||
matchers: g,
|
||||
|
@ -64,44 +64,34 @@ func domainToMatcher(domain *routercommon.Domain) (strmatcher.Matcher, error) {
|
||||
}
|
||||
|
||||
type DomainMatcher struct {
|
||||
matchers strmatcher.IndexMatcher
|
||||
matcher strmatcher.IndexMatcher
|
||||
}
|
||||
|
||||
func NewMphMatcherGroup(domains []*routercommon.Domain) (*DomainMatcher, error) {
|
||||
g := strmatcher.NewMphMatcherGroup()
|
||||
for _, d := range domains {
|
||||
matcherType, f := matcherTypeMap[d.Type]
|
||||
if !f {
|
||||
return nil, newError("unsupported domain type", d.Type)
|
||||
}
|
||||
_, err := g.AddPattern(d.Value, matcherType)
|
||||
func NewDomainMatcher(matcherType string, domains []*routercommon.Domain) (*DomainMatcher, error) {
|
||||
var indexMatcher strmatcher.IndexMatcher
|
||||
switch matcherType {
|
||||
case "mph", "hybrid":
|
||||
indexMatcher = strmatcher.NewMphIndexMatcher()
|
||||
case "linear":
|
||||
indexMatcher = strmatcher.NewLinearIndexMatcher()
|
||||
default:
|
||||
indexMatcher = strmatcher.NewLinearIndexMatcher()
|
||||
}
|
||||
for _, domain := range domains {
|
||||
matcher, err := domainToMatcher(domain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
indexMatcher.Add(matcher)
|
||||
}
|
||||
g.Build()
|
||||
return &DomainMatcher{
|
||||
matchers: g,
|
||||
}, nil
|
||||
if err := indexMatcher.Build(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &DomainMatcher{matcher: indexMatcher}, nil
|
||||
}
|
||||
|
||||
func NewDomainMatcher(domains []*routercommon.Domain) (*DomainMatcher, error) {
|
||||
g := new(strmatcher.MatcherGroup)
|
||||
for _, d := range domains {
|
||||
m, err := domainToMatcher(d)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
g.Add(m)
|
||||
}
|
||||
|
||||
return &DomainMatcher{
|
||||
matchers: g,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *DomainMatcher) ApplyDomain(domain string) bool {
|
||||
return len(m.matchers.Match(strings.ToLower(domain))) > 0
|
||||
func (m *DomainMatcher) Match(domain string) bool {
|
||||
return m.matcher.MatchAny(domain)
|
||||
}
|
||||
|
||||
// Apply implements Condition.
|
||||
@ -110,7 +100,7 @@ func (m *DomainMatcher) Apply(ctx routing.Context) bool {
|
||||
if len(domain) == 0 {
|
||||
return false
|
||||
}
|
||||
return m.ApplyDomain(domain)
|
||||
return m.Match(domain)
|
||||
}
|
||||
|
||||
type MultiGeoIPMatcher struct {
|
||||
|
@ -375,9 +375,9 @@ func TestChinaSites(t *testing.T) {
|
||||
domains, err := loadGeoSite("CN")
|
||||
common.Must(err)
|
||||
|
||||
matcher, err := router.NewDomainMatcher(domains)
|
||||
matcher, err := router.NewDomainMatcher("linear", domains)
|
||||
common.Must(err)
|
||||
acMatcher, err := router.NewMphMatcherGroup(domains)
|
||||
mphMatcher, err := router.NewDomainMatcher("mph", domains)
|
||||
common.Must(err)
|
||||
|
||||
type TestCase struct {
|
||||
@ -408,8 +408,8 @@ func TestChinaSites(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
r1 := matcher.ApplyDomain(testCase.Domain)
|
||||
r2 := acMatcher.ApplyDomain(testCase.Domain)
|
||||
r1 := matcher.Match(testCase.Domain)
|
||||
r2 := mphMatcher.Match(testCase.Domain)
|
||||
if r1 != testCase.Output {
|
||||
t.Error("DomainMatcher expected output ", testCase.Output, " for domain ", testCase.Domain, " but got ", r1)
|
||||
} else if r2 != testCase.Output {
|
||||
@ -422,7 +422,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
|
||||
domains, err := loadGeoSite("CN")
|
||||
common.Must(err)
|
||||
|
||||
matcher, err := router.NewMphMatcherGroup(domains)
|
||||
matcher, err := router.NewDomainMatcher("mph", domains)
|
||||
common.Must(err)
|
||||
|
||||
type TestCase struct {
|
||||
@ -455,7 +455,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, testCase := range testCases {
|
||||
_ = matcher.ApplyDomain(testCase.Domain)
|
||||
_ = matcher.Match(testCase.Domain)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -464,7 +464,7 @@ func BenchmarkDomainMatcher(b *testing.B) {
|
||||
domains, err := loadGeoSite("CN")
|
||||
common.Must(err)
|
||||
|
||||
matcher, err := router.NewDomainMatcher(domains)
|
||||
matcher, err := router.NewDomainMatcher("linear", domains)
|
||||
common.Must(err)
|
||||
|
||||
type TestCase struct {
|
||||
@ -497,7 +497,7 @@ func BenchmarkDomainMatcher(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, testCase := range testCases {
|
||||
_ = matcher.ApplyDomain(testCase.Domain)
|
||||
_ = matcher.Match(testCase.Domain)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,23 +39,11 @@ func (rr *RoutingRule) BuildCondition() (Condition, error) {
|
||||
conds := NewConditionChan()
|
||||
|
||||
if len(rr.Domain) > 0 {
|
||||
switch rr.DomainMatcher {
|
||||
case "mph", "hybrid":
|
||||
matcher, err := NewMphMatcherGroup(rr.Domain)
|
||||
if err != nil {
|
||||
return nil, newError("failed to build domain condition with MphDomainMatcher").Base(err)
|
||||
}
|
||||
newError("MphDomainMatcher is enabled for ", len(rr.Domain), " domain rule(s)").AtDebug().WriteToLog()
|
||||
conds.Add(matcher)
|
||||
case "linear":
|
||||
fallthrough
|
||||
default:
|
||||
matcher, err := NewDomainMatcher(rr.Domain)
|
||||
if err != nil {
|
||||
return nil, newError("failed to build domain condition").Base(err)
|
||||
}
|
||||
conds.Add(matcher)
|
||||
cond, err := NewDomainMatcher(rr.DomainMatcher, rr.Domain)
|
||||
if err != nil {
|
||||
return nil, newError("failed to build domain condition").Base(err)
|
||||
}
|
||||
conds.Add(cond)
|
||||
}
|
||||
|
||||
if len(rr.UserEmail) > 0 {
|
||||
|
@ -49,7 +49,7 @@ func (s *statsServer) GetStats(ctx context.Context, request *GetStatsRequest) (*
|
||||
}
|
||||
|
||||
func (s *statsServer) QueryStats(ctx context.Context, request *QueryStatsRequest) (*QueryStatsResponse, error) {
|
||||
mgroup := &strmatcher.MatcherGroup{}
|
||||
mgroup := &strmatcher.LinearIndexMatcher{}
|
||||
if request.Pattern != "" {
|
||||
request.Patterns = append(request.Patterns, request.Pattern)
|
||||
}
|
||||
|
@ -8,16 +8,18 @@ import (
|
||||
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||
)
|
||||
|
||||
func BenchmarkACAutomaton(b *testing.B) {
|
||||
ac := NewACAutomaton()
|
||||
// Benchmark Domain Matcher Groups
|
||||
|
||||
func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) {
|
||||
g := new(SimpleMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
ac.Add(strconv.Itoa(i)+".v2fly.org", Domain)
|
||||
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
ac.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ac.Match("0.v2fly.org")
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,7 +27,48 @@ func BenchmarkDomainMatcherGroup(b *testing.B) {
|
||||
g := new(DomainMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i))
|
||||
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) {
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
ac.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ac.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMphMatcherGroupForDomain(b *testing.B) {
|
||||
mph := NewMphMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
mph.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = mph.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark Full Matcher Groups
|
||||
|
||||
func BenchmarkSimpleMatcherGroupForFull(b *testing.B) {
|
||||
g := new(SimpleMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
@ -38,7 +81,7 @@ func BenchmarkFullMatcherGroup(b *testing.B) {
|
||||
g := new(FullMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i))
|
||||
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
@ -47,8 +90,64 @@ func BenchmarkFullMatcherGroup(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMarchGroup(b *testing.B) {
|
||||
g := new(MatcherGroup)
|
||||
func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) {
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
ac.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ac.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMphMatcherGroupFull(b *testing.B) {
|
||||
mph := NewMphMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
mph.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = mph.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark Substr Matcher Groups
|
||||
|
||||
func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) {
|
||||
g := new(SimpleMatcherGroup)
|
||||
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = g.Match("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) {
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
for i := 1; i <= 1024; i++ {
|
||||
AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||
}
|
||||
ac.Build()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ac.MatchAny("0.v2fly.org")
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark Index Matchers
|
||||
|
||||
func BenchmarkLinearIndexMatcher(b *testing.B) {
|
||||
g := new(LinearIndexMatcher)
|
||||
for i := 1; i <= 1024; i++ {
|
||||
m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
|
||||
common.Must(err)
|
||||
|
@ -1,25 +0,0 @@
|
||||
package strmatcher
|
||||
|
||||
type FullMatcherGroup struct {
|
||||
matchers map[string][]uint32
|
||||
}
|
||||
|
||||
func (g *FullMatcherGroup) Add(domain string, value uint32) {
|
||||
if g.matchers == nil {
|
||||
g.matchers = make(map[string][]uint32)
|
||||
}
|
||||
|
||||
g.matchers[domain] = append(g.matchers[domain], value)
|
||||
}
|
||||
|
||||
func (g *FullMatcherGroup) addMatcher(m fullMatcher, value uint32) {
|
||||
g.Add(string(m), value)
|
||||
}
|
||||
|
||||
func (g *FullMatcherGroup) Match(str string) []uint32 {
|
||||
if g.matchers == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return g.matchers[str]
|
||||
}
|
59
common/strmatcher/indexmatcher_linear.go
Normal file
59
common/strmatcher/indexmatcher_linear.go
Normal file
@ -0,0 +1,59 @@
|
||||
package strmatcher
|
||||
|
||||
// LinearIndexMatcher is an implementation of IndexMatcher.
|
||||
// Empty initialization works.
|
||||
type LinearIndexMatcher struct {
|
||||
count uint32
|
||||
fullMatcher FullMatcherGroup
|
||||
domainMatcher DomainMatcherGroup
|
||||
substrMatcher SubstrMatcherGroup
|
||||
otherMatchers SimpleMatcherGroup
|
||||
}
|
||||
|
||||
func NewLinearIndexMatcher() *LinearIndexMatcher {
|
||||
return new(LinearIndexMatcher)
|
||||
}
|
||||
|
||||
// Add implements IndexMatcher.Add.
|
||||
func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 {
|
||||
g.count++
|
||||
index := g.count
|
||||
|
||||
switch matcher := matcher.(type) {
|
||||
case FullMatcher:
|
||||
g.fullMatcher.AddFullMatcher(matcher, index)
|
||||
case DomainMatcher:
|
||||
g.domainMatcher.AddDomainMatcher(matcher, index)
|
||||
case SubstrMatcher:
|
||||
g.substrMatcher.AddSubstrMatcher(matcher, index)
|
||||
default:
|
||||
g.otherMatchers.AddMatcher(matcher, index)
|
||||
}
|
||||
|
||||
return index
|
||||
}
|
||||
|
||||
// Build implements IndexMatcher.Build.
|
||||
func (*LinearIndexMatcher) Build() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (g *LinearIndexMatcher) Match(input string) []uint32 {
|
||||
result := []uint32{}
|
||||
result = append(result, g.fullMatcher.Match(input)...)
|
||||
result = append(result, g.domainMatcher.Match(input)...)
|
||||
result = append(result, g.substrMatcher.Match(input)...)
|
||||
result = append(result, g.otherMatchers.Match(input)...)
|
||||
return result
|
||||
}
|
||||
|
||||
// MatchAny implements IndexMatcher.MatchAny.
|
||||
func (g *LinearIndexMatcher) MatchAny(input string) bool {
|
||||
return len(g.Match(input)) > 0
|
||||
}
|
||||
|
||||
// Size implements IndexMatcher.Size.
|
||||
func (g *LinearIndexMatcher) Size() uint32 {
|
||||
return g.count
|
||||
}
|
@ -9,7 +9,7 @@ import (
|
||||
)
|
||||
|
||||
// See https://github.com/v2fly/v2ray-core/issues/92#issuecomment-673238489
|
||||
func TestMatcherGroup(t *testing.T) {
|
||||
func TestLinearIndexMatcher(t *testing.T) {
|
||||
rules := []struct {
|
||||
Type Type
|
||||
Domain string
|
||||
@ -73,19 +73,20 @@ func TestMatcherGroup(t *testing.T) {
|
||||
},
|
||||
{
|
||||
Input: "testapis.us",
|
||||
Output: []uint32{1, 2, 6},
|
||||
Output: []uint32{2, 6, 1},
|
||||
},
|
||||
{
|
||||
Input: "example.com",
|
||||
Output: []uint32{10, 4},
|
||||
},
|
||||
}
|
||||
matcherGroup := &MatcherGroup{}
|
||||
matcherGroup := NewLinearIndexMatcher()
|
||||
for _, rule := range rules {
|
||||
matcher, err := rule.Type.New(rule.Domain)
|
||||
common.Must(err)
|
||||
matcherGroup.Add(matcher)
|
||||
}
|
||||
matcherGroup.Build()
|
||||
for _, test := range cases {
|
||||
if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
80
common/strmatcher/indexmatcher_mph.go
Normal file
80
common/strmatcher/indexmatcher_mph.go
Normal file
@ -0,0 +1,80 @@
|
||||
package strmatcher
|
||||
|
||||
// A MphIndexMatcher is divided into three parts:
|
||||
// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table;
|
||||
// 2. `substr` patterns are matched by ac automaton;
|
||||
// 3. `regex` patterns are matched with the regex library.
|
||||
type MphIndexMatcher struct {
|
||||
count uint32
|
||||
mph *MphMatcherGroup
|
||||
ac *ACAutomatonMatcherGroup
|
||||
regex SimpleMatcherGroup
|
||||
}
|
||||
|
||||
func NewMphIndexMatcher() *MphIndexMatcher {
|
||||
return &MphIndexMatcher{
|
||||
mph: nil,
|
||||
ac: nil,
|
||||
regex: SimpleMatcherGroup{},
|
||||
}
|
||||
}
|
||||
|
||||
// Add implements IndexMatcher.Add.
|
||||
func (g *MphIndexMatcher) Add(matcher Matcher) uint32 {
|
||||
g.count++
|
||||
index := g.count
|
||||
|
||||
switch matcher := matcher.(type) {
|
||||
case FullMatcher:
|
||||
if g.mph == nil {
|
||||
g.mph = NewMphMatcherGroup()
|
||||
}
|
||||
g.mph.AddFullMatcher(matcher, index)
|
||||
case DomainMatcher:
|
||||
if g.mph == nil {
|
||||
g.mph = NewMphMatcherGroup()
|
||||
}
|
||||
g.mph.AddDomainMatcher(matcher, index)
|
||||
case SubstrMatcher:
|
||||
if g.ac == nil {
|
||||
g.ac = NewACAutomatonMatcherGroup()
|
||||
}
|
||||
g.ac.AddSubstrMatcher(matcher, index)
|
||||
case *RegexMatcher:
|
||||
g.regex.AddMatcher(matcher, index)
|
||||
}
|
||||
|
||||
return index
|
||||
}
|
||||
|
||||
// Build implements IndexMatcher.Build.
|
||||
func (g *MphIndexMatcher) Build() error {
|
||||
if g.mph != nil {
|
||||
g.mph.Build()
|
||||
}
|
||||
if g.ac != nil {
|
||||
g.ac.Build()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (*MphIndexMatcher) Match(string) []uint32 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MatchAny implements IndexMatcher.MatchAny.
|
||||
func (g *MphIndexMatcher) MatchAny(input string) bool {
|
||||
if g.mph != nil && g.mph.MatchAny(input) {
|
||||
return true
|
||||
}
|
||||
if g.ac != nil && g.ac.MatchAny(input) {
|
||||
return true
|
||||
}
|
||||
return g.regex.MatchAny(input)
|
||||
}
|
||||
|
||||
// Size implements IndexMatcher.Size.
|
||||
func (g *MphIndexMatcher) Size() uint32 {
|
||||
return g.count
|
||||
}
|
@ -21,7 +21,9 @@ type Edge struct {
|
||||
nextNode int
|
||||
}
|
||||
|
||||
type ACAutomaton struct {
|
||||
// ACAutoMationMatcherGroup is an implementation of MatcherGroup.
|
||||
// It uses an AC Automata to provide support for Full, Domain and Substr matcher. Trie node is char based.
|
||||
type ACAutomatonMatcherGroup struct {
|
||||
trie [][validCharCount]Edge
|
||||
fail []int
|
||||
exists []MatchType
|
||||
@ -121,8 +123,8 @@ var char2Index = []int{
|
||||
'9': 52,
|
||||
}
|
||||
|
||||
func NewACAutomaton() *ACAutomaton {
|
||||
ac := new(ACAutomaton)
|
||||
func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
|
||||
ac := new(ACAutomatonMatcherGroup)
|
||||
ac.trie = append(ac.trie, newNode())
|
||||
ac.fail = append(ac.fail, 0)
|
||||
ac.exists = append(ac.exists, MatchType{
|
||||
@ -132,10 +134,25 @@ func NewACAutomaton() *ACAutomaton {
|
||||
return ac
|
||||
}
|
||||
|
||||
func (ac *ACAutomaton) Add(domain string, t Type) {
|
||||
node := 0
|
||||
for i := len(domain) - 1; i >= 0; i-- {
|
||||
idx := char2Index[domain[i]]
|
||||
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
|
||||
func (ac *ACAutomatonMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
|
||||
ac.addPattern(0, matcher.Pattern(), matcher.Type())
|
||||
}
|
||||
|
||||
// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
|
||||
func (ac *ACAutomatonMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
|
||||
node := ac.addPattern(0, matcher.Pattern(), Full)
|
||||
ac.addPattern(node, ".", Domain)
|
||||
}
|
||||
|
||||
// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
|
||||
func (ac *ACAutomatonMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, _ uint32) {
|
||||
ac.addPattern(0, matcher.Pattern(), matcher.Type())
|
||||
}
|
||||
|
||||
func (ac *ACAutomatonMatcherGroup) addPattern(node int, pattern string, matcherType Type) int {
|
||||
for i := len(pattern) - 1; i >= 0; i-- {
|
||||
idx := char2Index[pattern[i]]
|
||||
if ac.trie[node][idx].nextNode == 0 {
|
||||
ac.count++
|
||||
if len(ac.trie) < ac.count+1 {
|
||||
@ -154,42 +171,13 @@ func (ac *ACAutomaton) Add(domain string, t Type) {
|
||||
node = ac.trie[node][idx].nextNode
|
||||
}
|
||||
ac.exists[node] = MatchType{
|
||||
matchType: t,
|
||||
matchType: matcherType,
|
||||
exist: true,
|
||||
}
|
||||
switch t {
|
||||
case Domain:
|
||||
ac.exists[node] = MatchType{
|
||||
matchType: Full,
|
||||
exist: true,
|
||||
}
|
||||
idx := char2Index['.']
|
||||
if ac.trie[node][idx].nextNode == 0 {
|
||||
ac.count++
|
||||
if len(ac.trie) < ac.count+1 {
|
||||
ac.trie = append(ac.trie, newNode())
|
||||
ac.fail = append(ac.fail, 0)
|
||||
ac.exists = append(ac.exists, MatchType{
|
||||
matchType: Full,
|
||||
exist: false,
|
||||
})
|
||||
}
|
||||
ac.trie[node][idx] = Edge{
|
||||
edgeType: TrieEdge,
|
||||
nextNode: ac.count,
|
||||
}
|
||||
}
|
||||
node = ac.trie[node][idx].nextNode
|
||||
ac.exists[node] = MatchType{
|
||||
matchType: t,
|
||||
exist: true,
|
||||
}
|
||||
default:
|
||||
break
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
func (ac *ACAutomaton) Build() {
|
||||
func (ac *ACAutomatonMatcherGroup) Build() {
|
||||
queue := list.New()
|
||||
for i := 0; i < validCharCount; i++ {
|
||||
if ac.trie[0][i].nextNode != 0 {
|
||||
@ -218,7 +206,13 @@ func (ac *ACAutomaton) Build() {
|
||||
}
|
||||
}
|
||||
|
||||
func (ac *ACAutomaton) Match(s string) bool {
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (*ACAutomatonMatcherGroup) Match(_ string) []uint32 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
func (ac *ACAutomatonMatcherGroup) MatchAny(s string) bool {
|
||||
node := 0
|
||||
fullMatch := true
|
||||
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
|
183
common/strmatcher/matchergroup_ac_automation_test.go
Normal file
183
common/strmatcher/matchergroup_ac_automation_test.go
Normal file
@ -0,0 +1,183 @@
|
||||
package strmatcher_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/v2fly/v2ray-core/v4/common"
|
||||
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||
)
|
||||
|
||||
func TestACAutomatonMatcherGroup(t *testing.T) {
|
||||
cases1 := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
input string
|
||||
output bool
|
||||
}{
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "www.v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "www.v3fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "2fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "xv2fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Full,
|
||||
input: "v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Full,
|
||||
input: "xv2fly.org",
|
||||
output: false,
|
||||
},
|
||||
}
|
||||
for _, test := range cases1 {
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
matcher, err := test.mType.New(test.pattern)
|
||||
common.Must(err)
|
||||
common.Must(AddMatcherToGroup(ac, matcher, 0))
|
||||
ac.Build()
|
||||
if m := ac.MatchAny(test.input); m != test.output {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
{
|
||||
cases2Input := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
}{
|
||||
{
|
||||
pattern: "163.com",
|
||||
mType: Domain,
|
||||
},
|
||||
{
|
||||
pattern: "m.126.com",
|
||||
mType: Full,
|
||||
},
|
||||
{
|
||||
pattern: "3.com",
|
||||
mType: Full,
|
||||
},
|
||||
{
|
||||
pattern: "google.com",
|
||||
mType: Substr,
|
||||
},
|
||||
{
|
||||
pattern: "vgoogle.com",
|
||||
mType: Substr,
|
||||
},
|
||||
}
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
for _, test := range cases2Input {
|
||||
matcher, err := test.mType.New(test.pattern)
|
||||
common.Must(err)
|
||||
common.Must(AddMatcherToGroup(ac, matcher, 0))
|
||||
}
|
||||
ac.Build()
|
||||
cases2Output := []struct {
|
||||
pattern string
|
||||
res bool
|
||||
}{
|
||||
{
|
||||
pattern: "126.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "m.163.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "mm163.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "m.126.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "163.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "63.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "oogle.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "vvgoogle.com",
|
||||
res: true,
|
||||
},
|
||||
}
|
||||
for _, test := range cases2Output {
|
||||
if m := ac.MatchAny(test.pattern); m != test.res {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
cases3Input := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
}{
|
||||
{
|
||||
pattern: "video.google.com",
|
||||
mType: Domain,
|
||||
},
|
||||
{
|
||||
pattern: "gle.com",
|
||||
mType: Domain,
|
||||
},
|
||||
}
|
||||
ac := NewACAutomatonMatcherGroup()
|
||||
for _, test := range cases3Input {
|
||||
matcher, err := test.mType.New(test.pattern)
|
||||
common.Must(err)
|
||||
common.Must(AddMatcherToGroup(ac, matcher, 0))
|
||||
}
|
||||
ac.Build()
|
||||
cases3Output := []struct {
|
||||
pattern string
|
||||
res bool
|
||||
}{
|
||||
{
|
||||
pattern: "google.com",
|
||||
res: false,
|
||||
},
|
||||
}
|
||||
for _, test := range cases3Output {
|
||||
if m := ac.MatchAny(test.pattern); m != test.res {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -11,19 +11,20 @@ type node struct {
|
||||
sub map[string]*node
|
||||
}
|
||||
|
||||
// DomainMatcherGroup is a IndexMatcher for a large set of Domain matchers.
|
||||
// Visible for testing only.
|
||||
// DomainMatcherGroup is an implementation of MatcherGroup.
|
||||
// It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
|
||||
type DomainMatcherGroup struct {
|
||||
root *node
|
||||
}
|
||||
|
||||
func (g *DomainMatcherGroup) Add(domain string, value uint32) {
|
||||
// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
|
||||
func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
|
||||
if g.root == nil {
|
||||
g.root = new(node)
|
||||
}
|
||||
|
||||
current := g.root
|
||||
parts := breakDomain(domain)
|
||||
parts := breakDomain(matcher.Pattern())
|
||||
for i := len(parts) - 1; i >= 0; i-- {
|
||||
part := parts[i]
|
||||
if current.sub == nil {
|
||||
@ -40,10 +41,7 @@ func (g *DomainMatcherGroup) Add(domain string, value uint32) {
|
||||
current.values = append(current.values, value)
|
||||
}
|
||||
|
||||
func (g *DomainMatcherGroup) addMatcher(m domainMatcher, value uint32) {
|
||||
g.Add(string(m), value)
|
||||
}
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (g *DomainMatcherGroup) Match(domain string) []uint32 {
|
||||
if domain == "" {
|
||||
return nil
|
||||
@ -96,3 +94,8 @@ func (g *DomainMatcherGroup) Match(domain string) []uint32 {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
func (g *DomainMatcherGroup) MatchAny(domain string) bool {
|
||||
return len(g.Match(domain)) > 0
|
||||
}
|
@ -8,15 +8,39 @@ import (
|
||||
)
|
||||
|
||||
func TestDomainMatcherGroup(t *testing.T) {
|
||||
g := new(DomainMatcherGroup)
|
||||
g.Add("v2fly.org", 1)
|
||||
g.Add("google.com", 2)
|
||||
g.Add("x.a.com", 3)
|
||||
g.Add("a.b.com", 4)
|
||||
g.Add("c.a.b.com", 5)
|
||||
g.Add("x.y.com", 4)
|
||||
g.Add("x.y.com", 6)
|
||||
|
||||
patterns := []struct {
|
||||
Pattern string
|
||||
Value uint32
|
||||
}{
|
||||
{
|
||||
Pattern: "v2fly.org",
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Pattern: "google.com",
|
||||
Value: 2,
|
||||
},
|
||||
{
|
||||
Pattern: "x.a.com",
|
||||
Value: 3,
|
||||
},
|
||||
{
|
||||
Pattern: "a.b.com",
|
||||
Value: 4,
|
||||
},
|
||||
{
|
||||
Pattern: "c.a.b.com",
|
||||
Value: 5,
|
||||
},
|
||||
{
|
||||
Pattern: "x.y.com",
|
||||
Value: 4,
|
||||
},
|
||||
{
|
||||
Pattern: "x.y.com",
|
||||
Value: 6,
|
||||
},
|
||||
}
|
||||
testCases := []struct {
|
||||
Domain string
|
||||
Result []uint32
|
||||
@ -58,7 +82,10 @@ func TestDomainMatcherGroup(t *testing.T) {
|
||||
Result: []uint32{4, 6},
|
||||
},
|
||||
}
|
||||
|
||||
g := new(DomainMatcherGroup)
|
||||
for _, pattern := range patterns {
|
||||
AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
r := g.Match(testCase.Domain)
|
||||
if !reflect.DeepEqual(r, testCase.Result) {
|
30
common/strmatcher/matchergroup_full.go
Normal file
30
common/strmatcher/matchergroup_full.go
Normal file
@ -0,0 +1,30 @@
|
||||
package strmatcher
|
||||
|
||||
// FullMatcherGroup is an implementation of MatcherGroup.
|
||||
// It uses a hash table to facilitate exact match lookup.
|
||||
type FullMatcherGroup struct {
|
||||
matchers map[string][]uint32
|
||||
}
|
||||
|
||||
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
|
||||
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
|
||||
if g.matchers == nil {
|
||||
g.matchers = make(map[string][]uint32)
|
||||
}
|
||||
|
||||
domain := matcher.Pattern()
|
||||
g.matchers[domain] = append(g.matchers[domain], value)
|
||||
}
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (g *FullMatcherGroup) Match(input string) []uint32 {
|
||||
if g.matchers == nil {
|
||||
return nil
|
||||
}
|
||||
return g.matchers[input]
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.Any.
|
||||
func (g *FullMatcherGroup) MatchAny(input string) bool {
|
||||
return len(g.Match(input)) > 0
|
||||
}
|
@ -8,13 +8,31 @@ import (
|
||||
)
|
||||
|
||||
func TestFullMatcherGroup(t *testing.T) {
|
||||
g := new(FullMatcherGroup)
|
||||
g.Add("v2fly.org", 1)
|
||||
g.Add("google.com", 2)
|
||||
g.Add("x.a.com", 3)
|
||||
g.Add("x.y.com", 4)
|
||||
g.Add("x.y.com", 6)
|
||||
|
||||
patterns := []struct {
|
||||
Pattern string
|
||||
Value uint32
|
||||
}{
|
||||
{
|
||||
Pattern: "v2fly.org",
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Pattern: "google.com",
|
||||
Value: 2,
|
||||
},
|
||||
{
|
||||
Pattern: "x.a.com",
|
||||
Value: 3,
|
||||
},
|
||||
{
|
||||
Pattern: "x.y.com",
|
||||
Value: 4,
|
||||
},
|
||||
{
|
||||
Pattern: "x.y.com",
|
||||
Value: 6,
|
||||
},
|
||||
}
|
||||
testCases := []struct {
|
||||
Domain string
|
||||
Result []uint32
|
||||
@ -32,7 +50,10 @@ func TestFullMatcherGroup(t *testing.T) {
|
||||
Result: []uint32{4, 6},
|
||||
},
|
||||
}
|
||||
|
||||
g := new(FullMatcherGroup)
|
||||
for _, pattern := range patterns {
|
||||
AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
r := g.Match(testCase.Domain)
|
||||
if !reflect.DeepEqual(r, testCase.Result) {
|
@ -2,7 +2,6 @@ package strmatcher
|
||||
|
||||
import (
|
||||
"math/bits"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"unsafe"
|
||||
@ -20,79 +19,44 @@ func RollingHash(s string) uint32 {
|
||||
return h
|
||||
}
|
||||
|
||||
// A MphMatcherGroup is divided into three parts:
|
||||
// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table;
|
||||
// 2. `substr` patterns are matched by ac automaton;
|
||||
// 3. `regex` patterns are matched with the regex library.
|
||||
// MphMatcherGroup is an implementation of MatcherGroup.
|
||||
// It implements Rabin-Karp algorithm and minimal perfect hash table for Full and Domain matcher.
|
||||
type MphMatcherGroup struct {
|
||||
ac *ACAutomaton
|
||||
otherMatchers []matcherEntry
|
||||
rules []string
|
||||
level0 []uint32
|
||||
level0Mask int
|
||||
level1 []uint32
|
||||
level1Mask int
|
||||
count uint32
|
||||
ruleMap *map[string]uint32
|
||||
}
|
||||
|
||||
func (g *MphMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
|
||||
h := RollingHash(pattern)
|
||||
switch t {
|
||||
case Domain:
|
||||
(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
|
||||
fallthrough
|
||||
case Full:
|
||||
(*g.ruleMap)[pattern] = h
|
||||
default:
|
||||
}
|
||||
rules []string
|
||||
level0 []uint32
|
||||
level0Mask int
|
||||
level1 []uint32
|
||||
level1Mask int
|
||||
ruleMap *map[string]uint32
|
||||
}
|
||||
|
||||
func NewMphMatcherGroup() *MphMatcherGroup {
|
||||
return &MphMatcherGroup{
|
||||
ac: nil,
|
||||
otherMatchers: nil,
|
||||
rules: nil,
|
||||
level0: nil,
|
||||
level0Mask: 0,
|
||||
level1: nil,
|
||||
level1Mask: 0,
|
||||
count: 1,
|
||||
ruleMap: &map[string]uint32{},
|
||||
rules: nil,
|
||||
level0: nil,
|
||||
level0Mask: 0,
|
||||
level1: nil,
|
||||
level1Mask: 0,
|
||||
ruleMap: &map[string]uint32{},
|
||||
}
|
||||
}
|
||||
|
||||
// AddPattern adds a pattern to MphMatcherGroup
|
||||
func (g *MphMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
|
||||
switch t {
|
||||
case Substr:
|
||||
if g.ac == nil {
|
||||
g.ac = NewACAutomaton()
|
||||
}
|
||||
g.ac.Add(pattern, t)
|
||||
case Full, Domain:
|
||||
pattern = strings.ToLower(pattern)
|
||||
g.AddFullOrDomainPattern(pattern, t)
|
||||
case Regex:
|
||||
r, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
g.otherMatchers = append(g.otherMatchers, matcherEntry{
|
||||
m: ®exMatcher{pattern: r},
|
||||
id: g.count,
|
||||
})
|
||||
default:
|
||||
panic("Unknown type")
|
||||
}
|
||||
return g.count, nil
|
||||
// AddFullMatcher implements MatcherGroupForFull.
|
||||
func (g *MphMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
|
||||
pattern := strings.ToLower(matcher.Pattern())
|
||||
(*g.ruleMap)[pattern] = RollingHash(pattern)
|
||||
}
|
||||
|
||||
// Build builds a minimal perfect hash table and ac automaton from insert rules
|
||||
// AddDomainMatcher implements MatcherGroupForDomain.
|
||||
func (g *MphMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
|
||||
pattern := strings.ToLower(matcher.Pattern())
|
||||
h := RollingHash(pattern)
|
||||
(*g.ruleMap)[pattern] = h
|
||||
(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
|
||||
}
|
||||
|
||||
// Build builds a minimal perfect hash table for insert rules.
|
||||
func (g *MphMatcherGroup) Build() {
|
||||
if g.ac != nil {
|
||||
g.ac.Build()
|
||||
}
|
||||
keyLen := len(*g.ruleMap)
|
||||
if keyLen == 0 {
|
||||
keyLen = 1
|
||||
@ -127,7 +91,7 @@ func (g *MphMatcherGroup) Build() {
|
||||
findSeed := true
|
||||
tmpOcc = tmpOcc[:0]
|
||||
for _, i := range bucket.vals {
|
||||
n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask
|
||||
n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask // nosemgrep
|
||||
if occ[n] {
|
||||
for _, n := range tmpOcc {
|
||||
occ[n] = false
|
||||
@ -148,6 +112,34 @@ func (g *MphMatcherGroup) Build() {
|
||||
}
|
||||
}
|
||||
|
||||
// Lookup searches for s in t and returns its index and whether it was found.
|
||||
func (g *MphMatcherGroup) Lookup(h uint32, s string) bool {
|
||||
i0 := int(h) & g.level0Mask
|
||||
seed := g.level0[i0]
|
||||
i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask // nosemgrep
|
||||
n := g.level1[i1]
|
||||
return s == g.rules[int(n)]
|
||||
}
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (*MphMatcherGroup) Match(_ string) []uint32 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
func (g *MphMatcherGroup) MatchAny(pattern string) bool {
|
||||
hash := uint32(0)
|
||||
for i := len(pattern) - 1; i >= 0; i-- {
|
||||
hash = hash*PrimeRK + uint32(pattern[i])
|
||||
if pattern[i] == '.' {
|
||||
if g.Lookup(hash, pattern[i:]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return g.Lookup(hash, pattern)
|
||||
}
|
||||
|
||||
func nextPow2(v int) int {
|
||||
if v <= 1 {
|
||||
return 1
|
||||
@ -157,45 +149,6 @@ func nextPow2(v int) int {
|
||||
return int(n)
|
||||
}
|
||||
|
||||
// Lookup searches for s in t and returns its index and whether it was found.
|
||||
func (g *MphMatcherGroup) Lookup(h uint32, s string) bool {
|
||||
i0 := int(h) & g.level0Mask
|
||||
seed := g.level0[i0]
|
||||
i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask
|
||||
n := g.level1[i1]
|
||||
return s == g.rules[int(n)]
|
||||
}
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (g *MphMatcherGroup) Match(pattern string) []uint32 {
|
||||
result := []uint32{}
|
||||
hash := uint32(0)
|
||||
for i := len(pattern) - 1; i >= 0; i-- {
|
||||
hash = hash*PrimeRK + uint32(pattern[i])
|
||||
if pattern[i] == '.' {
|
||||
if g.Lookup(hash, pattern[i:]) {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
}
|
||||
}
|
||||
if g.Lookup(hash, pattern) {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
if g.ac != nil && g.ac.Match(pattern) {
|
||||
result = append(result, 1)
|
||||
return result
|
||||
}
|
||||
for _, e := range g.otherMatchers {
|
||||
if e.m.Match(pattern) {
|
||||
result = append(result, e.id)
|
||||
return result
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type indexBucket struct {
|
||||
n int
|
||||
vals []int
|
||||
@ -286,7 +239,7 @@ tail:
|
||||
}
|
||||
|
||||
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(p) + x)
|
||||
return unsafe.Pointer(uintptr(p) + x) // nosemgrep
|
||||
}
|
||||
|
||||
func readUnaligned32(p unsafe.Pointer) uint32 {
|
174
common/strmatcher/matchergroup_mph_test.go
Normal file
174
common/strmatcher/matchergroup_mph_test.go
Normal file
@ -0,0 +1,174 @@
|
||||
package strmatcher_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/v2fly/v2ray-core/v4/common"
|
||||
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||
)
|
||||
|
||||
func TestMphMatcherGroup(t *testing.T) {
|
||||
cases1 := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
input string
|
||||
output bool
|
||||
}{
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "www.v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "www.v3fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "2fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "xv2fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Full,
|
||||
input: "v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Full,
|
||||
input: "xv2fly.org",
|
||||
output: false,
|
||||
},
|
||||
}
|
||||
for _, test := range cases1 {
|
||||
mph := NewMphMatcherGroup()
|
||||
matcher, err := test.mType.New(test.pattern)
|
||||
common.Must(err)
|
||||
common.Must(AddMatcherToGroup(mph, matcher, 0))
|
||||
mph.Build()
|
||||
if m := mph.MatchAny(test.input); m != test.output {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
{
|
||||
cases2Input := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
}{
|
||||
{
|
||||
pattern: "163.com",
|
||||
mType: Domain,
|
||||
},
|
||||
{
|
||||
pattern: "m.126.com",
|
||||
mType: Full,
|
||||
},
|
||||
{
|
||||
pattern: "3.com",
|
||||
mType: Full,
|
||||
},
|
||||
}
|
||||
mph := NewMphMatcherGroup()
|
||||
for _, test := range cases2Input {
|
||||
matcher, err := test.mType.New(test.pattern)
|
||||
common.Must(err)
|
||||
common.Must(AddMatcherToGroup(mph, matcher, 0))
|
||||
}
|
||||
mph.Build()
|
||||
cases2Output := []struct {
|
||||
pattern string
|
||||
res bool
|
||||
}{
|
||||
{
|
||||
pattern: "126.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "m.163.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "mm163.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "m.126.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "163.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "63.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "oogle.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "vvgoogle.com",
|
||||
res: false,
|
||||
},
|
||||
}
|
||||
for _, test := range cases2Output {
|
||||
if m := mph.MatchAny(test.pattern); m != test.res {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
cases3Input := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
}{
|
||||
{
|
||||
pattern: "video.google.com",
|
||||
mType: Domain,
|
||||
},
|
||||
{
|
||||
pattern: "gle.com",
|
||||
mType: Domain,
|
||||
},
|
||||
}
|
||||
mph := NewMphMatcherGroup()
|
||||
for _, test := range cases3Input {
|
||||
matcher, err := test.mType.New(test.pattern)
|
||||
common.Must(err)
|
||||
common.Must(AddMatcherToGroup(mph, matcher, 0))
|
||||
}
|
||||
mph.Build()
|
||||
cases3Output := []struct {
|
||||
pattern string
|
||||
res bool
|
||||
}{
|
||||
{
|
||||
pattern: "google.com",
|
||||
res: false,
|
||||
},
|
||||
}
|
||||
for _, test := range cases3Output {
|
||||
if m := mph.MatchAny(test.pattern); m != test.res {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
36
common/strmatcher/matchergroup_simple.go
Normal file
36
common/strmatcher/matchergroup_simple.go
Normal file
@ -0,0 +1,36 @@
|
||||
package strmatcher
|
||||
|
||||
type matcherEntry struct {
|
||||
matcher Matcher
|
||||
value uint32
|
||||
}
|
||||
|
||||
// SimpleMatcherGroup is an implementation of MatcherGroup.
|
||||
// It simply stores all matchers in an array and sequentially matches them.
|
||||
type SimpleMatcherGroup struct {
|
||||
matchers []matcherEntry
|
||||
}
|
||||
|
||||
// AddMatcher implements MatcherGroupForAll.AddMatcher.
|
||||
func (g *SimpleMatcherGroup) AddMatcher(matcher Matcher, value uint32) {
|
||||
g.matchers = append(g.matchers, matcherEntry{
|
||||
matcher: matcher,
|
||||
value: value,
|
||||
})
|
||||
}
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (g *SimpleMatcherGroup) Match(input string) []uint32 {
|
||||
result := []uint32{}
|
||||
for _, e := range g.matchers {
|
||||
if e.matcher.Match(input) {
|
||||
result = append(result, e.value)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
func (g *SimpleMatcherGroup) MatchAny(input string) bool {
|
||||
return len(g.Match(input)) > 0
|
||||
}
|
69
common/strmatcher/matchergroup_simple_test.go
Normal file
69
common/strmatcher/matchergroup_simple_test.go
Normal file
@ -0,0 +1,69 @@
|
||||
package strmatcher_test
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/v2fly/v2ray-core/v4/common"
|
||||
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||
)
|
||||
|
||||
func TestSimpleMatcherGroup(t *testing.T) {
|
||||
patterns := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
}{
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Full,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Regex,
|
||||
},
|
||||
}
|
||||
cases := []struct {
|
||||
input string
|
||||
output []uint32
|
||||
}{
|
||||
{
|
||||
input: "www.v2fly.org",
|
||||
output: []uint32{0, 2},
|
||||
},
|
||||
{
|
||||
input: "v2fly.org",
|
||||
output: []uint32{0, 1, 2},
|
||||
},
|
||||
{
|
||||
input: "www.v3fly.org",
|
||||
output: []uint32{},
|
||||
},
|
||||
{
|
||||
input: "2fly.org",
|
||||
output: []uint32{},
|
||||
},
|
||||
{
|
||||
input: "xv2fly.org",
|
||||
output: []uint32{2},
|
||||
},
|
||||
{
|
||||
input: "v2flyxorg",
|
||||
output: []uint32{2},
|
||||
},
|
||||
}
|
||||
matcherGroup := &SimpleMatcherGroup{}
|
||||
for id, entry := range patterns {
|
||||
matcher, err := entry.mType.New(entry.pattern)
|
||||
common.Must(err)
|
||||
common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id)))
|
||||
}
|
||||
for _, test := range cases {
|
||||
if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) {
|
||||
t.Error("unexpected output: ", r, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
47
common/strmatcher/matchergroup_substr.go
Normal file
47
common/strmatcher/matchergroup_substr.go
Normal file
@ -0,0 +1,47 @@
|
||||
package strmatcher
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// SubstrMatcherGroup is implementation of MatcherGroup,
|
||||
// It is simply implmeneted to comply with the priority specification of Substr matchers.
|
||||
type SubstrMatcherGroup struct {
|
||||
patterns []string
|
||||
values []uint32
|
||||
}
|
||||
|
||||
// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
|
||||
func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint32) {
|
||||
g.patterns = append(g.patterns, matcher.Pattern())
|
||||
g.values = append(g.values, value)
|
||||
}
|
||||
|
||||
// Match implements MatcherGroup.Match.
|
||||
func (g *SubstrMatcherGroup) Match(input string) []uint32 {
|
||||
result := []uint32{}
|
||||
for i, pattern := range g.patterns {
|
||||
for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
|
||||
result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
|
||||
}
|
||||
}
|
||||
// Sort the match results in dictionary order, so that:
|
||||
// 1. Pattern matched at smaller position (meaning matched further) takes precedence.
|
||||
// 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
|
||||
sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
|
||||
for i, entry := range result {
|
||||
result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// MatchAny implements MatcherGroup.MatchAny.
|
||||
func (g *SubstrMatcherGroup) MatchAny(input string) bool {
|
||||
for _, pattern := range g.patterns {
|
||||
if strings.Contains(input, pattern) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
65
common/strmatcher/matchergroup_substr_test.go
Normal file
65
common/strmatcher/matchergroup_substr_test.go
Normal file
@ -0,0 +1,65 @@
|
||||
package strmatcher_test
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/v2fly/v2ray-core/v4/common"
|
||||
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||
)
|
||||
|
||||
func TestSubstrMatcherGroup(t *testing.T) {
|
||||
patterns := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
}{
|
||||
{
|
||||
pattern: "apis",
|
||||
mType: Substr,
|
||||
},
|
||||
{
|
||||
pattern: "google",
|
||||
mType: Substr,
|
||||
},
|
||||
{
|
||||
pattern: "apis",
|
||||
mType: Substr,
|
||||
},
|
||||
}
|
||||
cases := []struct {
|
||||
input string
|
||||
output []uint32
|
||||
}{
|
||||
{
|
||||
input: "google.com",
|
||||
output: []uint32{1},
|
||||
},
|
||||
{
|
||||
input: "apis.com",
|
||||
output: []uint32{0, 2},
|
||||
},
|
||||
{
|
||||
input: "googleapis.com",
|
||||
output: []uint32{1, 0, 2},
|
||||
},
|
||||
{
|
||||
input: "fonts.googleapis.com",
|
||||
output: []uint32{1, 0, 2},
|
||||
},
|
||||
{
|
||||
input: "apis.googleapis.com",
|
||||
output: []uint32{0, 2, 1, 0, 2},
|
||||
},
|
||||
}
|
||||
matcherGroup := &SubstrMatcherGroup{}
|
||||
for id, entry := range patterns {
|
||||
matcher, err := entry.mType.New(entry.pattern)
|
||||
common.Must(err)
|
||||
common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id)))
|
||||
}
|
||||
for _, test := range cases {
|
||||
if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) {
|
||||
t.Error("unexpected output: ", r, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,52 +1,167 @@
|
||||
package strmatcher
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type fullMatcher string
|
||||
// FullMatcher is an implementation of Matcher.
|
||||
type FullMatcher string
|
||||
|
||||
func (m fullMatcher) Match(s string) bool {
|
||||
func (FullMatcher) Type() Type {
|
||||
return Full
|
||||
}
|
||||
|
||||
func (m FullMatcher) Pattern() string {
|
||||
return string(m)
|
||||
}
|
||||
|
||||
func (m FullMatcher) String() string {
|
||||
return "full:" + m.Pattern()
|
||||
}
|
||||
|
||||
func (m FullMatcher) Match(s string) bool {
|
||||
return string(m) == s
|
||||
}
|
||||
|
||||
func (m fullMatcher) String() string {
|
||||
return "full:" + string(m)
|
||||
// DomainMatcher is an implementation of Matcher.
|
||||
type DomainMatcher string
|
||||
|
||||
func (DomainMatcher) Type() Type {
|
||||
return Domain
|
||||
}
|
||||
|
||||
type substrMatcher string
|
||||
|
||||
func (m substrMatcher) Match(s string) bool {
|
||||
return strings.Contains(s, string(m))
|
||||
func (m DomainMatcher) Pattern() string {
|
||||
return string(m)
|
||||
}
|
||||
|
||||
func (m substrMatcher) String() string {
|
||||
return "keyword:" + string(m)
|
||||
func (m DomainMatcher) String() string {
|
||||
return "domain:" + m.Pattern()
|
||||
}
|
||||
|
||||
type domainMatcher string
|
||||
|
||||
func (m domainMatcher) Match(s string) bool {
|
||||
pattern := string(m)
|
||||
func (m DomainMatcher) Match(s string) bool {
|
||||
pattern := m.Pattern()
|
||||
if !strings.HasSuffix(s, pattern) {
|
||||
return false
|
||||
}
|
||||
return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
|
||||
}
|
||||
|
||||
func (m domainMatcher) String() string {
|
||||
return "domain:" + string(m)
|
||||
// SubstrMatcher is an implementation of Matcher.
|
||||
type SubstrMatcher string
|
||||
|
||||
func (SubstrMatcher) Type() Type {
|
||||
return Substr
|
||||
}
|
||||
|
||||
type regexMatcher struct {
|
||||
func (m SubstrMatcher) Pattern() string {
|
||||
return string(m)
|
||||
}
|
||||
|
||||
func (m SubstrMatcher) String() string {
|
||||
return "keyword:" + m.Pattern()
|
||||
}
|
||||
|
||||
func (m SubstrMatcher) Match(s string) bool {
|
||||
return strings.Contains(s, m.Pattern())
|
||||
}
|
||||
|
||||
// RegexMatcher is an implementation of Matcher.
|
||||
type RegexMatcher struct {
|
||||
pattern *regexp.Regexp
|
||||
}
|
||||
|
||||
func (m *regexMatcher) Match(s string) bool {
|
||||
func (*RegexMatcher) Type() Type {
|
||||
return Regex
|
||||
}
|
||||
|
||||
func (m *RegexMatcher) Pattern() string {
|
||||
return m.pattern.String()
|
||||
}
|
||||
|
||||
func (m *RegexMatcher) String() string {
|
||||
return "regexp:" + m.Pattern()
|
||||
}
|
||||
|
||||
func (m *RegexMatcher) Match(s string) bool {
|
||||
return m.pattern.MatchString(s)
|
||||
}
|
||||
|
||||
func (m *regexMatcher) String() string {
|
||||
return "regexp:" + m.pattern.String()
|
||||
// New creates a new Matcher based on the given pattern.
|
||||
func (t Type) New(pattern string) (Matcher, error) {
|
||||
switch t {
|
||||
case Full:
|
||||
return FullMatcher(pattern), nil
|
||||
case Substr:
|
||||
return SubstrMatcher(pattern), nil
|
||||
case Domain:
|
||||
return DomainMatcher(pattern), nil
|
||||
case Regex: // 1. regex matching is case-sensitive
|
||||
regex, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &RegexMatcher{pattern: regex}, nil
|
||||
default:
|
||||
panic("Unknown type")
|
||||
}
|
||||
}
|
||||
|
||||
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
|
||||
type MatcherGroupForAll interface {
|
||||
AddMatcher(matcher Matcher, value uint32)
|
||||
}
|
||||
|
||||
// MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers.
|
||||
type MatcherGroupForFull interface {
|
||||
AddFullMatcher(matcher FullMatcher, value uint32)
|
||||
}
|
||||
|
||||
// MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers.
|
||||
type MatcherGroupForDomain interface {
|
||||
AddDomainMatcher(matcher DomainMatcher, value uint32)
|
||||
}
|
||||
|
||||
// MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers.
|
||||
type MatcherGroupForSubstr interface {
|
||||
AddSubstrMatcher(matcher SubstrMatcher, value uint32)
|
||||
}
|
||||
|
||||
// MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers.
|
||||
type MatcherGroupForRegex interface {
|
||||
AddRegexMatcher(matcher *RegexMatcher, value uint32)
|
||||
}
|
||||
|
||||
// AddMatcherGroup is a helper function to try to add a Matcher to any kind of MatcherGroup.
|
||||
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
|
||||
// This function is provided to help writing code to test a MatcherGroup.
|
||||
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
||||
if g, ok := g.(MatcherGroupForAll); ok {
|
||||
g.AddMatcher(matcher, value)
|
||||
return nil
|
||||
}
|
||||
switch matcher := matcher.(type) {
|
||||
case FullMatcher:
|
||||
if g, ok := g.(MatcherGroupForFull); ok {
|
||||
g.AddFullMatcher(matcher, value)
|
||||
return nil
|
||||
}
|
||||
case DomainMatcher:
|
||||
if g, ok := g.(MatcherGroupForDomain); ok {
|
||||
g.AddDomainMatcher(matcher, value)
|
||||
return nil
|
||||
}
|
||||
case SubstrMatcher:
|
||||
if g, ok := g.(MatcherGroupForSubstr); ok {
|
||||
g.AddSubstrMatcher(matcher, value)
|
||||
return nil
|
||||
}
|
||||
case *RegexMatcher:
|
||||
if g, ok := g.(MatcherGroupForRegex); ok {
|
||||
g.AddRegexMatcher(matcher, value)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return errors.New("cannot add matcher to matcher group")
|
||||
}
|
||||
|
@ -71,172 +71,3 @@ func TestMatcher(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestACAutomaton(t *testing.T) {
|
||||
cases1 := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
input string
|
||||
output bool
|
||||
}{
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "www.v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "www.v3fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "2fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Domain,
|
||||
input: "xv2fly.org",
|
||||
output: false,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Full,
|
||||
input: "v2fly.org",
|
||||
output: true,
|
||||
},
|
||||
{
|
||||
pattern: "v2fly.org",
|
||||
mType: Full,
|
||||
input: "xv2fly.org",
|
||||
output: false,
|
||||
},
|
||||
}
|
||||
for _, test := range cases1 {
|
||||
ac := NewACAutomaton()
|
||||
ac.Add(test.pattern, test.mType)
|
||||
ac.Build()
|
||||
if m := ac.Match(test.input); m != test.output {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
{
|
||||
cases2Input := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
}{
|
||||
{
|
||||
pattern: "163.com",
|
||||
mType: Domain,
|
||||
},
|
||||
{
|
||||
pattern: "m.126.com",
|
||||
mType: Full,
|
||||
},
|
||||
{
|
||||
pattern: "3.com",
|
||||
mType: Full,
|
||||
},
|
||||
{
|
||||
pattern: "google.com",
|
||||
mType: Substr,
|
||||
},
|
||||
{
|
||||
pattern: "vgoogle.com",
|
||||
mType: Substr,
|
||||
},
|
||||
}
|
||||
ac := NewACAutomaton()
|
||||
for _, test := range cases2Input {
|
||||
ac.Add(test.pattern, test.mType)
|
||||
}
|
||||
ac.Build()
|
||||
cases2Output := []struct {
|
||||
pattern string
|
||||
res bool
|
||||
}{
|
||||
{
|
||||
pattern: "126.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "m.163.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "mm163.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "m.126.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "163.com",
|
||||
res: true,
|
||||
},
|
||||
{
|
||||
pattern: "63.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "oogle.com",
|
||||
res: false,
|
||||
},
|
||||
{
|
||||
pattern: "vvgoogle.com",
|
||||
res: true,
|
||||
},
|
||||
}
|
||||
for _, test := range cases2Output {
|
||||
if m := ac.Match(test.pattern); m != test.res {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
cases3Input := []struct {
|
||||
pattern string
|
||||
mType Type
|
||||
}{
|
||||
{
|
||||
pattern: "video.google.com",
|
||||
mType: Domain,
|
||||
},
|
||||
{
|
||||
pattern: "gle.com",
|
||||
mType: Domain,
|
||||
},
|
||||
}
|
||||
ac := NewACAutomaton()
|
||||
for _, test := range cases3Input {
|
||||
ac.Add(test.pattern, test.mType)
|
||||
}
|
||||
ac.Build()
|
||||
cases3Output := []struct {
|
||||
pattern string
|
||||
res bool
|
||||
}{
|
||||
{
|
||||
pattern: "google.com",
|
||||
res: false,
|
||||
},
|
||||
}
|
||||
for _, test := range cases3Output {
|
||||
if m := ac.Match(test.pattern); m != test.res {
|
||||
t.Error("unexpected output: ", m, " for test case ", test)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,107 +1,74 @@
|
||||
package strmatcher
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// Matcher is the interface to determine a string matches a pattern.
|
||||
type Matcher interface {
|
||||
// Match returns true if the given string matches a predefined pattern.
|
||||
Match(string) bool
|
||||
String() string
|
||||
}
|
||||
|
||||
// Type is the type of the matcher.
|
||||
type Type byte
|
||||
|
||||
const (
|
||||
// Full is the type of matcher that the input string must exactly equal to the pattern.
|
||||
Full Type = iota
|
||||
// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
|
||||
Substr
|
||||
Full Type = 0
|
||||
// Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
|
||||
Domain
|
||||
Domain Type = 1
|
||||
// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
|
||||
Substr Type = 2
|
||||
// Regex is the type of matcher that the input string must matches the regular-expression pattern.
|
||||
Regex
|
||||
Regex Type = 3
|
||||
)
|
||||
|
||||
// New creates a new Matcher based on the given pattern.
|
||||
func (t Type) New(pattern string) (Matcher, error) {
|
||||
// 1. regex matching is case-sensitive
|
||||
switch t {
|
||||
case Full:
|
||||
return fullMatcher(pattern), nil
|
||||
case Substr:
|
||||
return substrMatcher(pattern), nil
|
||||
case Domain:
|
||||
return domainMatcher(pattern), nil
|
||||
case Regex:
|
||||
r, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ®exMatcher{
|
||||
pattern: r,
|
||||
}, nil
|
||||
default:
|
||||
panic("Unknown type")
|
||||
}
|
||||
// Matcher is the interface to determine a string matches a pattern.
|
||||
// * This is a basic matcher to represent a certain kind of match semantic(full, substr, domain or regex).
|
||||
type Matcher interface {
|
||||
// Type returns the matcher's type.
|
||||
Type() Type
|
||||
|
||||
// Pattern returns the matcher's raw string representation.
|
||||
Pattern() string
|
||||
|
||||
// String returns a string representation of the matcher containing its type and pattern.
|
||||
String() string
|
||||
|
||||
// Match returns true if the given string matches a predefined pattern.
|
||||
// * This method is seldom used for performance reason
|
||||
// and is generally taken over by their corresponding MatcherGroup.
|
||||
Match(input string) bool
|
||||
}
|
||||
|
||||
// IndexMatcher is the interface for matching with a group of matchers.
|
||||
type IndexMatcher interface {
|
||||
// Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists.
|
||||
// MatcherGroup is an advanced type of matcher to accept a bunch of basic Matchers (of certain type, not all matcher types).
|
||||
// For example:
|
||||
// * FullMatcherGroup accepts FullMatcher and uses a hash table to facilitate lookup.
|
||||
// * DomainMatcherGroup accepts DomainMatcher and uses a trie to optimize both memory consumption and lookup speed.
|
||||
type MatcherGroup interface {
|
||||
// Match returns all matched matchers with their corresponding values.
|
||||
Match(input string) []uint32
|
||||
|
||||
// MatchAny returns true as soon as one matching matcher is found.
|
||||
MatchAny(input string) bool
|
||||
}
|
||||
|
||||
type matcherEntry struct {
|
||||
m Matcher
|
||||
id uint32
|
||||
}
|
||||
|
||||
// MatcherGroup is an implementation of IndexMatcher.
|
||||
// Empty initialization works.
|
||||
type MatcherGroup struct {
|
||||
count uint32
|
||||
fullMatcher FullMatcherGroup
|
||||
domainMatcher DomainMatcherGroup
|
||||
otherMatchers []matcherEntry
|
||||
}
|
||||
|
||||
// Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0.
|
||||
func (g *MatcherGroup) Add(m Matcher) uint32 {
|
||||
g.count++
|
||||
c := g.count
|
||||
|
||||
switch tm := m.(type) {
|
||||
case fullMatcher:
|
||||
g.fullMatcher.addMatcher(tm, c)
|
||||
case domainMatcher:
|
||||
g.domainMatcher.addMatcher(tm, c)
|
||||
default:
|
||||
g.otherMatchers = append(g.otherMatchers, matcherEntry{
|
||||
m: m,
|
||||
id: c,
|
||||
})
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// Match implements IndexMatcher.Match.
|
||||
func (g *MatcherGroup) Match(pattern string) []uint32 {
|
||||
result := []uint32{}
|
||||
result = append(result, g.fullMatcher.Match(pattern)...)
|
||||
result = append(result, g.domainMatcher.Match(pattern)...)
|
||||
for _, e := range g.otherMatchers {
|
||||
if e.m.Match(pattern) {
|
||||
result = append(result, e.id)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Size returns the number of matchers in the MatcherGroup.
|
||||
func (g *MatcherGroup) Size() uint32 {
|
||||
return g.count
|
||||
// IndexMatcher is a general type of matcher thats accepts all kinds of basic matchers.
|
||||
// It should:
|
||||
// * Accept all Matcher types with no exception.
|
||||
// * Optimize string matching with a combination of MatcherGroups.
|
||||
// * Obey certain priority order specification when returning matched Matchers.
|
||||
type IndexMatcher interface {
|
||||
// Size returns number of matchers added to IndexMatcher.
|
||||
Size() uint32
|
||||
|
||||
// Add adds a new Matcher to IndexMatcher, and returns its index. The index will never be 0.
|
||||
Add(matcher Matcher) uint32
|
||||
|
||||
// Build builds the IndexMatcher to be ready for matching.
|
||||
Build() error
|
||||
|
||||
// Match returns the indices of all matchers that matches the input.
|
||||
// * Empty array is returned if no such matcher exists.
|
||||
// * The order of returned matchers should follow priority specification.
|
||||
// Priority specification:
|
||||
// 1. Priority between matcher types: full > domain > substr > regex.
|
||||
// 2. Priority of same-priority matchers matching at same position: the early added takes precedence.
|
||||
// 3. Priority of domain matchers matching at different levels: the further matched domain takes precedence.
|
||||
// 4. Priority of substr matchers matching at different positions: the further matched substr takes precedence.
|
||||
Match(input string) []uint32
|
||||
|
||||
// MatchAny returns true as soon as one matching matcher is found.
|
||||
MatchAny(input string) bool
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user