mirror of
https://github.com/v2fly/v2ray-core.git
synced 2025-01-04 16:37:12 -05:00
Refactor: strmatcher module (#1333)
* Reorganize strmatcher's package structure * Rename types in strmatcher package according to their file names * Stablize strmatcher's Matcher interface * Implement []matcherEntry as SimpleMatcherGroup * Implement mph algorithm extracted from MphIndexMatcher as MphMatcherGroup * Implement AddMatcher/AddFullMatcher/AddDomainMatcher/AddSubstrMatcher for each MatcherGroup * Stablize strmatcher's MatcherGroup interface * Stablize strmatcher's IndexMatcher interface * Update strmatcher's benchmark * Compatibility fix for app/router's DomainMatcher condition * Fix code quality issue * Fix basic matcher issues * Update priority specification for Substr matcher
This commit is contained in:
parent
a66bb28aee
commit
d4da365c5f
@ -98,7 +98,7 @@ func New(ctx context.Context, config *Config) (*DNS, error) {
|
|||||||
|
|
||||||
// MatcherInfos is ensured to cover the maximum index domainMatcher could return, where matcher's index starts from 1
|
// MatcherInfos is ensured to cover the maximum index domainMatcher could return, where matcher's index starts from 1
|
||||||
matcherInfos := make([]DomainMatcherInfo, domainRuleCount+1)
|
matcherInfos := make([]DomainMatcherInfo, domainRuleCount+1)
|
||||||
domainMatcher := &strmatcher.MatcherGroup{}
|
domainMatcher := &strmatcher.LinearIndexMatcher{}
|
||||||
geoipContainer := router.GeoIPMatcherContainer{}
|
geoipContainer := router.GeoIPMatcherContainer{}
|
||||||
|
|
||||||
for _, endpoint := range config.NameServers {
|
for _, endpoint := range config.NameServers {
|
||||||
|
@ -11,12 +11,12 @@ import (
|
|||||||
// StaticHosts represents static domain-ip mapping in DNS server.
|
// StaticHosts represents static domain-ip mapping in DNS server.
|
||||||
type StaticHosts struct {
|
type StaticHosts struct {
|
||||||
ips [][]net.Address
|
ips [][]net.Address
|
||||||
matchers *strmatcher.MatcherGroup
|
matchers *strmatcher.LinearIndexMatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewStaticHosts creates a new StaticHosts instance.
|
// NewStaticHosts creates a new StaticHosts instance.
|
||||||
func NewStaticHosts(hosts []*HostMapping, legacy map[string]*net.IPOrDomain) (*StaticHosts, error) {
|
func NewStaticHosts(hosts []*HostMapping, legacy map[string]*net.IPOrDomain) (*StaticHosts, error) {
|
||||||
g := new(strmatcher.MatcherGroup)
|
g := new(strmatcher.LinearIndexMatcher)
|
||||||
sh := &StaticHosts{
|
sh := &StaticHosts{
|
||||||
ips: make([][]net.Address, len(hosts)+len(legacy)+16),
|
ips: make([][]net.Address, len(hosts)+len(legacy)+16),
|
||||||
matchers: g,
|
matchers: g,
|
||||||
|
@ -64,44 +64,34 @@ func domainToMatcher(domain *routercommon.Domain) (strmatcher.Matcher, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type DomainMatcher struct {
|
type DomainMatcher struct {
|
||||||
matchers strmatcher.IndexMatcher
|
matcher strmatcher.IndexMatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewMphMatcherGroup(domains []*routercommon.Domain) (*DomainMatcher, error) {
|
func NewDomainMatcher(matcherType string, domains []*routercommon.Domain) (*DomainMatcher, error) {
|
||||||
g := strmatcher.NewMphMatcherGroup()
|
var indexMatcher strmatcher.IndexMatcher
|
||||||
for _, d := range domains {
|
switch matcherType {
|
||||||
matcherType, f := matcherTypeMap[d.Type]
|
case "mph", "hybrid":
|
||||||
if !f {
|
indexMatcher = strmatcher.NewMphIndexMatcher()
|
||||||
return nil, newError("unsupported domain type", d.Type)
|
case "linear":
|
||||||
|
indexMatcher = strmatcher.NewLinearIndexMatcher()
|
||||||
|
default:
|
||||||
|
indexMatcher = strmatcher.NewLinearIndexMatcher()
|
||||||
}
|
}
|
||||||
_, err := g.AddPattern(d.Value, matcherType)
|
for _, domain := range domains {
|
||||||
|
matcher, err := domainToMatcher(domain)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
indexMatcher.Add(matcher)
|
||||||
}
|
}
|
||||||
g.Build()
|
if err := indexMatcher.Build(); err != nil {
|
||||||
return &DomainMatcher{
|
|
||||||
matchers: g,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewDomainMatcher(domains []*routercommon.Domain) (*DomainMatcher, error) {
|
|
||||||
g := new(strmatcher.MatcherGroup)
|
|
||||||
for _, d := range domains {
|
|
||||||
m, err := domainToMatcher(d)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
g.Add(m)
|
return &DomainMatcher{matcher: indexMatcher}, nil
|
||||||
}
|
|
||||||
|
|
||||||
return &DomainMatcher{
|
|
||||||
matchers: g,
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *DomainMatcher) ApplyDomain(domain string) bool {
|
func (m *DomainMatcher) Match(domain string) bool {
|
||||||
return len(m.matchers.Match(strings.ToLower(domain))) > 0
|
return m.matcher.MatchAny(domain)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply implements Condition.
|
// Apply implements Condition.
|
||||||
@ -110,7 +100,7 @@ func (m *DomainMatcher) Apply(ctx routing.Context) bool {
|
|||||||
if len(domain) == 0 {
|
if len(domain) == 0 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return m.ApplyDomain(domain)
|
return m.Match(domain)
|
||||||
}
|
}
|
||||||
|
|
||||||
type MultiGeoIPMatcher struct {
|
type MultiGeoIPMatcher struct {
|
||||||
|
@ -375,9 +375,9 @@ func TestChinaSites(t *testing.T) {
|
|||||||
domains, err := loadGeoSite("CN")
|
domains, err := loadGeoSite("CN")
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
|
|
||||||
matcher, err := router.NewDomainMatcher(domains)
|
matcher, err := router.NewDomainMatcher("linear", domains)
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
acMatcher, err := router.NewMphMatcherGroup(domains)
|
mphMatcher, err := router.NewDomainMatcher("mph", domains)
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
|
|
||||||
type TestCase struct {
|
type TestCase struct {
|
||||||
@ -408,8 +408,8 @@ func TestChinaSites(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
r1 := matcher.ApplyDomain(testCase.Domain)
|
r1 := matcher.Match(testCase.Domain)
|
||||||
r2 := acMatcher.ApplyDomain(testCase.Domain)
|
r2 := mphMatcher.Match(testCase.Domain)
|
||||||
if r1 != testCase.Output {
|
if r1 != testCase.Output {
|
||||||
t.Error("DomainMatcher expected output ", testCase.Output, " for domain ", testCase.Domain, " but got ", r1)
|
t.Error("DomainMatcher expected output ", testCase.Output, " for domain ", testCase.Domain, " but got ", r1)
|
||||||
} else if r2 != testCase.Output {
|
} else if r2 != testCase.Output {
|
||||||
@ -422,7 +422,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
|
|||||||
domains, err := loadGeoSite("CN")
|
domains, err := loadGeoSite("CN")
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
|
|
||||||
matcher, err := router.NewMphMatcherGroup(domains)
|
matcher, err := router.NewDomainMatcher("mph", domains)
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
|
|
||||||
type TestCase struct {
|
type TestCase struct {
|
||||||
@ -455,7 +455,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
|
|||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
_ = matcher.ApplyDomain(testCase.Domain)
|
_ = matcher.Match(testCase.Domain)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -464,7 +464,7 @@ func BenchmarkDomainMatcher(b *testing.B) {
|
|||||||
domains, err := loadGeoSite("CN")
|
domains, err := loadGeoSite("CN")
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
|
|
||||||
matcher, err := router.NewDomainMatcher(domains)
|
matcher, err := router.NewDomainMatcher("linear", domains)
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
|
|
||||||
type TestCase struct {
|
type TestCase struct {
|
||||||
@ -497,7 +497,7 @@ func BenchmarkDomainMatcher(b *testing.B) {
|
|||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
_ = matcher.ApplyDomain(testCase.Domain)
|
_ = matcher.Match(testCase.Domain)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -39,23 +39,11 @@ func (rr *RoutingRule) BuildCondition() (Condition, error) {
|
|||||||
conds := NewConditionChan()
|
conds := NewConditionChan()
|
||||||
|
|
||||||
if len(rr.Domain) > 0 {
|
if len(rr.Domain) > 0 {
|
||||||
switch rr.DomainMatcher {
|
cond, err := NewDomainMatcher(rr.DomainMatcher, rr.Domain)
|
||||||
case "mph", "hybrid":
|
|
||||||
matcher, err := NewMphMatcherGroup(rr.Domain)
|
|
||||||
if err != nil {
|
|
||||||
return nil, newError("failed to build domain condition with MphDomainMatcher").Base(err)
|
|
||||||
}
|
|
||||||
newError("MphDomainMatcher is enabled for ", len(rr.Domain), " domain rule(s)").AtDebug().WriteToLog()
|
|
||||||
conds.Add(matcher)
|
|
||||||
case "linear":
|
|
||||||
fallthrough
|
|
||||||
default:
|
|
||||||
matcher, err := NewDomainMatcher(rr.Domain)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, newError("failed to build domain condition").Base(err)
|
return nil, newError("failed to build domain condition").Base(err)
|
||||||
}
|
}
|
||||||
conds.Add(matcher)
|
conds.Add(cond)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(rr.UserEmail) > 0 {
|
if len(rr.UserEmail) > 0 {
|
||||||
|
@ -49,7 +49,7 @@ func (s *statsServer) GetStats(ctx context.Context, request *GetStatsRequest) (*
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *statsServer) QueryStats(ctx context.Context, request *QueryStatsRequest) (*QueryStatsResponse, error) {
|
func (s *statsServer) QueryStats(ctx context.Context, request *QueryStatsRequest) (*QueryStatsResponse, error) {
|
||||||
mgroup := &strmatcher.MatcherGroup{}
|
mgroup := &strmatcher.LinearIndexMatcher{}
|
||||||
if request.Pattern != "" {
|
if request.Pattern != "" {
|
||||||
request.Patterns = append(request.Patterns, request.Pattern)
|
request.Patterns = append(request.Patterns, request.Pattern)
|
||||||
}
|
}
|
||||||
|
@ -8,16 +8,18 @@ import (
|
|||||||
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||||
)
|
)
|
||||||
|
|
||||||
func BenchmarkACAutomaton(b *testing.B) {
|
// Benchmark Domain Matcher Groups
|
||||||
ac := NewACAutomaton()
|
|
||||||
|
func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) {
|
||||||
|
g := new(SimpleMatcherGroup)
|
||||||
|
|
||||||
for i := 1; i <= 1024; i++ {
|
for i := 1; i <= 1024; i++ {
|
||||||
ac.Add(strconv.Itoa(i)+".v2fly.org", Domain)
|
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
}
|
}
|
||||||
ac.Build()
|
|
||||||
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
_ = ac.Match("0.v2fly.org")
|
_ = g.Match("0.v2fly.org")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,7 +27,48 @@ func BenchmarkDomainMatcherGroup(b *testing.B) {
|
|||||||
g := new(DomainMatcherGroup)
|
g := new(DomainMatcherGroup)
|
||||||
|
|
||||||
for i := 1; i <= 1024; i++ {
|
for i := 1; i <= 1024; i++ {
|
||||||
g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i))
|
AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = g.Match("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) {
|
||||||
|
ac := NewACAutomatonMatcherGroup()
|
||||||
|
for i := 1; i <= 1024; i++ {
|
||||||
|
AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
ac.Build()
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = ac.MatchAny("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkMphMatcherGroupForDomain(b *testing.B) {
|
||||||
|
mph := NewMphMatcherGroup()
|
||||||
|
for i := 1; i <= 1024; i++ {
|
||||||
|
AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
mph.Build()
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = mph.MatchAny("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Benchmark Full Matcher Groups
|
||||||
|
|
||||||
|
func BenchmarkSimpleMatcherGroupForFull(b *testing.B) {
|
||||||
|
g := new(SimpleMatcherGroup)
|
||||||
|
|
||||||
|
for i := 1; i <= 1024; i++ {
|
||||||
|
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
}
|
}
|
||||||
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
@ -38,7 +81,7 @@ func BenchmarkFullMatcherGroup(b *testing.B) {
|
|||||||
g := new(FullMatcherGroup)
|
g := new(FullMatcherGroup)
|
||||||
|
|
||||||
for i := 1; i <= 1024; i++ {
|
for i := 1; i <= 1024; i++ {
|
||||||
g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i))
|
AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
}
|
}
|
||||||
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
@ -47,8 +90,64 @@ func BenchmarkFullMatcherGroup(b *testing.B) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkMarchGroup(b *testing.B) {
|
func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) {
|
||||||
g := new(MatcherGroup)
|
ac := NewACAutomatonMatcherGroup()
|
||||||
|
for i := 1; i <= 1024; i++ {
|
||||||
|
AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
ac.Build()
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = ac.MatchAny("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkMphMatcherGroupFull(b *testing.B) {
|
||||||
|
mph := NewMphMatcherGroup()
|
||||||
|
for i := 1; i <= 1024; i++ {
|
||||||
|
AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
mph.Build()
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = mph.MatchAny("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Benchmark Substr Matcher Groups
|
||||||
|
|
||||||
|
func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) {
|
||||||
|
g := new(SimpleMatcherGroup)
|
||||||
|
|
||||||
|
for i := 1; i <= 1024; i++ {
|
||||||
|
AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = g.Match("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) {
|
||||||
|
ac := NewACAutomatonMatcherGroup()
|
||||||
|
for i := 1; i <= 1024; i++ {
|
||||||
|
AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
|
||||||
|
}
|
||||||
|
ac.Build()
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = ac.MatchAny("0.v2fly.org")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Benchmark Index Matchers
|
||||||
|
|
||||||
|
func BenchmarkLinearIndexMatcher(b *testing.B) {
|
||||||
|
g := new(LinearIndexMatcher)
|
||||||
for i := 1; i <= 1024; i++ {
|
for i := 1; i <= 1024; i++ {
|
||||||
m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
|
m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
|
@ -1,25 +0,0 @@
|
|||||||
package strmatcher
|
|
||||||
|
|
||||||
type FullMatcherGroup struct {
|
|
||||||
matchers map[string][]uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g *FullMatcherGroup) Add(domain string, value uint32) {
|
|
||||||
if g.matchers == nil {
|
|
||||||
g.matchers = make(map[string][]uint32)
|
|
||||||
}
|
|
||||||
|
|
||||||
g.matchers[domain] = append(g.matchers[domain], value)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g *FullMatcherGroup) addMatcher(m fullMatcher, value uint32) {
|
|
||||||
g.Add(string(m), value)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g *FullMatcherGroup) Match(str string) []uint32 {
|
|
||||||
if g.matchers == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return g.matchers[str]
|
|
||||||
}
|
|
59
common/strmatcher/indexmatcher_linear.go
Normal file
59
common/strmatcher/indexmatcher_linear.go
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
package strmatcher
|
||||||
|
|
||||||
|
// LinearIndexMatcher is an implementation of IndexMatcher.
|
||||||
|
// Empty initialization works.
|
||||||
|
type LinearIndexMatcher struct {
|
||||||
|
count uint32
|
||||||
|
fullMatcher FullMatcherGroup
|
||||||
|
domainMatcher DomainMatcherGroup
|
||||||
|
substrMatcher SubstrMatcherGroup
|
||||||
|
otherMatchers SimpleMatcherGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLinearIndexMatcher() *LinearIndexMatcher {
|
||||||
|
return new(LinearIndexMatcher)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add implements IndexMatcher.Add.
|
||||||
|
func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 {
|
||||||
|
g.count++
|
||||||
|
index := g.count
|
||||||
|
|
||||||
|
switch matcher := matcher.(type) {
|
||||||
|
case FullMatcher:
|
||||||
|
g.fullMatcher.AddFullMatcher(matcher, index)
|
||||||
|
case DomainMatcher:
|
||||||
|
g.domainMatcher.AddDomainMatcher(matcher, index)
|
||||||
|
case SubstrMatcher:
|
||||||
|
g.substrMatcher.AddSubstrMatcher(matcher, index)
|
||||||
|
default:
|
||||||
|
g.otherMatchers.AddMatcher(matcher, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build implements IndexMatcher.Build.
|
||||||
|
func (*LinearIndexMatcher) Build() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match implements IndexMatcher.Match.
|
||||||
|
func (g *LinearIndexMatcher) Match(input string) []uint32 {
|
||||||
|
result := []uint32{}
|
||||||
|
result = append(result, g.fullMatcher.Match(input)...)
|
||||||
|
result = append(result, g.domainMatcher.Match(input)...)
|
||||||
|
result = append(result, g.substrMatcher.Match(input)...)
|
||||||
|
result = append(result, g.otherMatchers.Match(input)...)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAny implements IndexMatcher.MatchAny.
|
||||||
|
func (g *LinearIndexMatcher) MatchAny(input string) bool {
|
||||||
|
return len(g.Match(input)) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size implements IndexMatcher.Size.
|
||||||
|
func (g *LinearIndexMatcher) Size() uint32 {
|
||||||
|
return g.count
|
||||||
|
}
|
@ -9,7 +9,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// See https://github.com/v2fly/v2ray-core/issues/92#issuecomment-673238489
|
// See https://github.com/v2fly/v2ray-core/issues/92#issuecomment-673238489
|
||||||
func TestMatcherGroup(t *testing.T) {
|
func TestLinearIndexMatcher(t *testing.T) {
|
||||||
rules := []struct {
|
rules := []struct {
|
||||||
Type Type
|
Type Type
|
||||||
Domain string
|
Domain string
|
||||||
@ -73,19 +73,20 @@ func TestMatcherGroup(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
Input: "testapis.us",
|
Input: "testapis.us",
|
||||||
Output: []uint32{1, 2, 6},
|
Output: []uint32{2, 6, 1},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Input: "example.com",
|
Input: "example.com",
|
||||||
Output: []uint32{10, 4},
|
Output: []uint32{10, 4},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
matcherGroup := &MatcherGroup{}
|
matcherGroup := NewLinearIndexMatcher()
|
||||||
for _, rule := range rules {
|
for _, rule := range rules {
|
||||||
matcher, err := rule.Type.New(rule.Domain)
|
matcher, err := rule.Type.New(rule.Domain)
|
||||||
common.Must(err)
|
common.Must(err)
|
||||||
matcherGroup.Add(matcher)
|
matcherGroup.Add(matcher)
|
||||||
}
|
}
|
||||||
|
matcherGroup.Build()
|
||||||
for _, test := range cases {
|
for _, test := range cases {
|
||||||
if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
|
if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
|
||||||
t.Error("unexpected output: ", m, " for test case ", test)
|
t.Error("unexpected output: ", m, " for test case ", test)
|
80
common/strmatcher/indexmatcher_mph.go
Normal file
80
common/strmatcher/indexmatcher_mph.go
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
package strmatcher
|
||||||
|
|
||||||
|
// A MphIndexMatcher is divided into three parts:
|
||||||
|
// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table;
|
||||||
|
// 2. `substr` patterns are matched by ac automaton;
|
||||||
|
// 3. `regex` patterns are matched with the regex library.
|
||||||
|
type MphIndexMatcher struct {
|
||||||
|
count uint32
|
||||||
|
mph *MphMatcherGroup
|
||||||
|
ac *ACAutomatonMatcherGroup
|
||||||
|
regex SimpleMatcherGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMphIndexMatcher() *MphIndexMatcher {
|
||||||
|
return &MphIndexMatcher{
|
||||||
|
mph: nil,
|
||||||
|
ac: nil,
|
||||||
|
regex: SimpleMatcherGroup{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add implements IndexMatcher.Add.
|
||||||
|
func (g *MphIndexMatcher) Add(matcher Matcher) uint32 {
|
||||||
|
g.count++
|
||||||
|
index := g.count
|
||||||
|
|
||||||
|
switch matcher := matcher.(type) {
|
||||||
|
case FullMatcher:
|
||||||
|
if g.mph == nil {
|
||||||
|
g.mph = NewMphMatcherGroup()
|
||||||
|
}
|
||||||
|
g.mph.AddFullMatcher(matcher, index)
|
||||||
|
case DomainMatcher:
|
||||||
|
if g.mph == nil {
|
||||||
|
g.mph = NewMphMatcherGroup()
|
||||||
|
}
|
||||||
|
g.mph.AddDomainMatcher(matcher, index)
|
||||||
|
case SubstrMatcher:
|
||||||
|
if g.ac == nil {
|
||||||
|
g.ac = NewACAutomatonMatcherGroup()
|
||||||
|
}
|
||||||
|
g.ac.AddSubstrMatcher(matcher, index)
|
||||||
|
case *RegexMatcher:
|
||||||
|
g.regex.AddMatcher(matcher, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build implements IndexMatcher.Build.
|
||||||
|
func (g *MphIndexMatcher) Build() error {
|
||||||
|
if g.mph != nil {
|
||||||
|
g.mph.Build()
|
||||||
|
}
|
||||||
|
if g.ac != nil {
|
||||||
|
g.ac.Build()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match implements IndexMatcher.Match.
|
||||||
|
func (*MphIndexMatcher) Match(string) []uint32 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAny implements IndexMatcher.MatchAny.
|
||||||
|
func (g *MphIndexMatcher) MatchAny(input string) bool {
|
||||||
|
if g.mph != nil && g.mph.MatchAny(input) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if g.ac != nil && g.ac.MatchAny(input) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return g.regex.MatchAny(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size implements IndexMatcher.Size.
|
||||||
|
func (g *MphIndexMatcher) Size() uint32 {
|
||||||
|
return g.count
|
||||||
|
}
|
@ -21,7 +21,9 @@ type Edge struct {
|
|||||||
nextNode int
|
nextNode int
|
||||||
}
|
}
|
||||||
|
|
||||||
type ACAutomaton struct {
|
// ACAutoMationMatcherGroup is an implementation of MatcherGroup.
|
||||||
|
// It uses an AC Automata to provide support for Full, Domain and Substr matcher. Trie node is char based.
|
||||||
|
type ACAutomatonMatcherGroup struct {
|
||||||
trie [][validCharCount]Edge
|
trie [][validCharCount]Edge
|
||||||
fail []int
|
fail []int
|
||||||
exists []MatchType
|
exists []MatchType
|
||||||
@ -121,8 +123,8 @@ var char2Index = []int{
|
|||||||
'9': 52,
|
'9': 52,
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewACAutomaton() *ACAutomaton {
|
func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
|
||||||
ac := new(ACAutomaton)
|
ac := new(ACAutomatonMatcherGroup)
|
||||||
ac.trie = append(ac.trie, newNode())
|
ac.trie = append(ac.trie, newNode())
|
||||||
ac.fail = append(ac.fail, 0)
|
ac.fail = append(ac.fail, 0)
|
||||||
ac.exists = append(ac.exists, MatchType{
|
ac.exists = append(ac.exists, MatchType{
|
||||||
@ -132,64 +134,50 @@ func NewACAutomaton() *ACAutomaton {
|
|||||||
return ac
|
return ac
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac *ACAutomaton) Add(domain string, t Type) {
|
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
|
||||||
node := 0
|
func (ac *ACAutomatonMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
|
||||||
for i := len(domain) - 1; i >= 0; i-- {
|
ac.addPattern(0, matcher.Pattern(), matcher.Type())
|
||||||
idx := char2Index[domain[i]]
|
|
||||||
if ac.trie[node][idx].nextNode == 0 {
|
|
||||||
ac.count++
|
|
||||||
if len(ac.trie) < ac.count+1 {
|
|
||||||
ac.trie = append(ac.trie, newNode())
|
|
||||||
ac.fail = append(ac.fail, 0)
|
|
||||||
ac.exists = append(ac.exists, MatchType{
|
|
||||||
matchType: Full,
|
|
||||||
exist: false,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
ac.trie[node][idx] = Edge{
|
|
||||||
edgeType: TrieEdge,
|
|
||||||
nextNode: ac.count,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
node = ac.trie[node][idx].nextNode
|
|
||||||
}
|
|
||||||
ac.exists[node] = MatchType{
|
|
||||||
matchType: t,
|
|
||||||
exist: true,
|
|
||||||
}
|
|
||||||
switch t {
|
|
||||||
case Domain:
|
|
||||||
ac.exists[node] = MatchType{
|
|
||||||
matchType: Full,
|
|
||||||
exist: true,
|
|
||||||
}
|
|
||||||
idx := char2Index['.']
|
|
||||||
if ac.trie[node][idx].nextNode == 0 {
|
|
||||||
ac.count++
|
|
||||||
if len(ac.trie) < ac.count+1 {
|
|
||||||
ac.trie = append(ac.trie, newNode())
|
|
||||||
ac.fail = append(ac.fail, 0)
|
|
||||||
ac.exists = append(ac.exists, MatchType{
|
|
||||||
matchType: Full,
|
|
||||||
exist: false,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
ac.trie[node][idx] = Edge{
|
|
||||||
edgeType: TrieEdge,
|
|
||||||
nextNode: ac.count,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
node = ac.trie[node][idx].nextNode
|
|
||||||
ac.exists[node] = MatchType{
|
|
||||||
matchType: t,
|
|
||||||
exist: true,
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac *ACAutomaton) Build() {
|
// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
|
||||||
|
func (ac *ACAutomatonMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
|
||||||
|
node := ac.addPattern(0, matcher.Pattern(), Full)
|
||||||
|
ac.addPattern(node, ".", Domain)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
|
||||||
|
func (ac *ACAutomatonMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, _ uint32) {
|
||||||
|
ac.addPattern(0, matcher.Pattern(), matcher.Type())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ac *ACAutomatonMatcherGroup) addPattern(node int, pattern string, matcherType Type) int {
|
||||||
|
for i := len(pattern) - 1; i >= 0; i-- {
|
||||||
|
idx := char2Index[pattern[i]]
|
||||||
|
if ac.trie[node][idx].nextNode == 0 {
|
||||||
|
ac.count++
|
||||||
|
if len(ac.trie) < ac.count+1 {
|
||||||
|
ac.trie = append(ac.trie, newNode())
|
||||||
|
ac.fail = append(ac.fail, 0)
|
||||||
|
ac.exists = append(ac.exists, MatchType{
|
||||||
|
matchType: Full,
|
||||||
|
exist: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
ac.trie[node][idx] = Edge{
|
||||||
|
edgeType: TrieEdge,
|
||||||
|
nextNode: ac.count,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node = ac.trie[node][idx].nextNode
|
||||||
|
}
|
||||||
|
ac.exists[node] = MatchType{
|
||||||
|
matchType: matcherType,
|
||||||
|
exist: true,
|
||||||
|
}
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ac *ACAutomatonMatcherGroup) Build() {
|
||||||
queue := list.New()
|
queue := list.New()
|
||||||
for i := 0; i < validCharCount; i++ {
|
for i := 0; i < validCharCount; i++ {
|
||||||
if ac.trie[0][i].nextNode != 0 {
|
if ac.trie[0][i].nextNode != 0 {
|
||||||
@ -218,7 +206,13 @@ func (ac *ACAutomaton) Build() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ac *ACAutomaton) Match(s string) bool {
|
// Match implements MatcherGroup.Match.
|
||||||
|
func (*ACAutomatonMatcherGroup) Match(_ string) []uint32 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
|
func (ac *ACAutomatonMatcherGroup) MatchAny(s string) bool {
|
||||||
node := 0
|
node := 0
|
||||||
fullMatch := true
|
fullMatch := true
|
||||||
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
|
// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
|
183
common/strmatcher/matchergroup_ac_automation_test.go
Normal file
183
common/strmatcher/matchergroup_ac_automation_test.go
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
package strmatcher_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/v2fly/v2ray-core/v4/common"
|
||||||
|
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestACAutomatonMatcherGroup(t *testing.T) {
|
||||||
|
cases1 := []struct {
|
||||||
|
pattern string
|
||||||
|
mType Type
|
||||||
|
input string
|
||||||
|
output bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "www.v2fly.org",
|
||||||
|
output: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "v2fly.org",
|
||||||
|
output: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "www.v3fly.org",
|
||||||
|
output: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "2fly.org",
|
||||||
|
output: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "xv2fly.org",
|
||||||
|
output: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Full,
|
||||||
|
input: "v2fly.org",
|
||||||
|
output: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Full,
|
||||||
|
input: "xv2fly.org",
|
||||||
|
output: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range cases1 {
|
||||||
|
ac := NewACAutomatonMatcherGroup()
|
||||||
|
matcher, err := test.mType.New(test.pattern)
|
||||||
|
common.Must(err)
|
||||||
|
common.Must(AddMatcherToGroup(ac, matcher, 0))
|
||||||
|
ac.Build()
|
||||||
|
if m := ac.MatchAny(test.input); m != test.output {
|
||||||
|
t.Error("unexpected output: ", m, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
cases2Input := []struct {
|
||||||
|
pattern string
|
||||||
|
mType Type
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "163.com",
|
||||||
|
mType: Domain,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "m.126.com",
|
||||||
|
mType: Full,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "3.com",
|
||||||
|
mType: Full,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "google.com",
|
||||||
|
mType: Substr,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "vgoogle.com",
|
||||||
|
mType: Substr,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ac := NewACAutomatonMatcherGroup()
|
||||||
|
for _, test := range cases2Input {
|
||||||
|
matcher, err := test.mType.New(test.pattern)
|
||||||
|
common.Must(err)
|
||||||
|
common.Must(AddMatcherToGroup(ac, matcher, 0))
|
||||||
|
}
|
||||||
|
ac.Build()
|
||||||
|
cases2Output := []struct {
|
||||||
|
pattern string
|
||||||
|
res bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "126.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "m.163.com",
|
||||||
|
res: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "mm163.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "m.126.com",
|
||||||
|
res: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "163.com",
|
||||||
|
res: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "63.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "oogle.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "vvgoogle.com",
|
||||||
|
res: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range cases2Output {
|
||||||
|
if m := ac.MatchAny(test.pattern); m != test.res {
|
||||||
|
t.Error("unexpected output: ", m, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
cases3Input := []struct {
|
||||||
|
pattern string
|
||||||
|
mType Type
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "video.google.com",
|
||||||
|
mType: Domain,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "gle.com",
|
||||||
|
mType: Domain,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
ac := NewACAutomatonMatcherGroup()
|
||||||
|
for _, test := range cases3Input {
|
||||||
|
matcher, err := test.mType.New(test.pattern)
|
||||||
|
common.Must(err)
|
||||||
|
common.Must(AddMatcherToGroup(ac, matcher, 0))
|
||||||
|
}
|
||||||
|
ac.Build()
|
||||||
|
cases3Output := []struct {
|
||||||
|
pattern string
|
||||||
|
res bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "google.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range cases3Output {
|
||||||
|
if m := ac.MatchAny(test.pattern); m != test.res {
|
||||||
|
t.Error("unexpected output: ", m, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -11,19 +11,20 @@ type node struct {
|
|||||||
sub map[string]*node
|
sub map[string]*node
|
||||||
}
|
}
|
||||||
|
|
||||||
// DomainMatcherGroup is a IndexMatcher for a large set of Domain matchers.
|
// DomainMatcherGroup is an implementation of MatcherGroup.
|
||||||
// Visible for testing only.
|
// It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
|
||||||
type DomainMatcherGroup struct {
|
type DomainMatcherGroup struct {
|
||||||
root *node
|
root *node
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *DomainMatcherGroup) Add(domain string, value uint32) {
|
// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
|
||||||
|
func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
|
||||||
if g.root == nil {
|
if g.root == nil {
|
||||||
g.root = new(node)
|
g.root = new(node)
|
||||||
}
|
}
|
||||||
|
|
||||||
current := g.root
|
current := g.root
|
||||||
parts := breakDomain(domain)
|
parts := breakDomain(matcher.Pattern())
|
||||||
for i := len(parts) - 1; i >= 0; i-- {
|
for i := len(parts) - 1; i >= 0; i-- {
|
||||||
part := parts[i]
|
part := parts[i]
|
||||||
if current.sub == nil {
|
if current.sub == nil {
|
||||||
@ -40,10 +41,7 @@ func (g *DomainMatcherGroup) Add(domain string, value uint32) {
|
|||||||
current.values = append(current.values, value)
|
current.values = append(current.values, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *DomainMatcherGroup) addMatcher(m domainMatcher, value uint32) {
|
// Match implements MatcherGroup.Match.
|
||||||
g.Add(string(m), value)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (g *DomainMatcherGroup) Match(domain string) []uint32 {
|
func (g *DomainMatcherGroup) Match(domain string) []uint32 {
|
||||||
if domain == "" {
|
if domain == "" {
|
||||||
return nil
|
return nil
|
||||||
@ -96,3 +94,8 @@ func (g *DomainMatcherGroup) Match(domain string) []uint32 {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
|
func (g *DomainMatcherGroup) MatchAny(domain string) bool {
|
||||||
|
return len(g.Match(domain)) > 0
|
||||||
|
}
|
@ -8,15 +8,39 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestDomainMatcherGroup(t *testing.T) {
|
func TestDomainMatcherGroup(t *testing.T) {
|
||||||
g := new(DomainMatcherGroup)
|
patterns := []struct {
|
||||||
g.Add("v2fly.org", 1)
|
Pattern string
|
||||||
g.Add("google.com", 2)
|
Value uint32
|
||||||
g.Add("x.a.com", 3)
|
}{
|
||||||
g.Add("a.b.com", 4)
|
{
|
||||||
g.Add("c.a.b.com", 5)
|
Pattern: "v2fly.org",
|
||||||
g.Add("x.y.com", 4)
|
Value: 1,
|
||||||
g.Add("x.y.com", 6)
|
},
|
||||||
|
{
|
||||||
|
Pattern: "google.com",
|
||||||
|
Value: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "x.a.com",
|
||||||
|
Value: 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "a.b.com",
|
||||||
|
Value: 4,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "c.a.b.com",
|
||||||
|
Value: 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "x.y.com",
|
||||||
|
Value: 4,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "x.y.com",
|
||||||
|
Value: 6,
|
||||||
|
},
|
||||||
|
}
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
Domain string
|
Domain string
|
||||||
Result []uint32
|
Result []uint32
|
||||||
@ -58,7 +82,10 @@ func TestDomainMatcherGroup(t *testing.T) {
|
|||||||
Result: []uint32{4, 6},
|
Result: []uint32{4, 6},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
g := new(DomainMatcherGroup)
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
|
||||||
|
}
|
||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
r := g.Match(testCase.Domain)
|
r := g.Match(testCase.Domain)
|
||||||
if !reflect.DeepEqual(r, testCase.Result) {
|
if !reflect.DeepEqual(r, testCase.Result) {
|
30
common/strmatcher/matchergroup_full.go
Normal file
30
common/strmatcher/matchergroup_full.go
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
package strmatcher
|
||||||
|
|
||||||
|
// FullMatcherGroup is an implementation of MatcherGroup.
|
||||||
|
// It uses a hash table to facilitate exact match lookup.
|
||||||
|
type FullMatcherGroup struct {
|
||||||
|
matchers map[string][]uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
|
||||||
|
func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
|
||||||
|
if g.matchers == nil {
|
||||||
|
g.matchers = make(map[string][]uint32)
|
||||||
|
}
|
||||||
|
|
||||||
|
domain := matcher.Pattern()
|
||||||
|
g.matchers[domain] = append(g.matchers[domain], value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match implements MatcherGroup.Match.
|
||||||
|
func (g *FullMatcherGroup) Match(input string) []uint32 {
|
||||||
|
if g.matchers == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return g.matchers[input]
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAny implements MatcherGroup.Any.
|
||||||
|
func (g *FullMatcherGroup) MatchAny(input string) bool {
|
||||||
|
return len(g.Match(input)) > 0
|
||||||
|
}
|
@ -8,13 +8,31 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestFullMatcherGroup(t *testing.T) {
|
func TestFullMatcherGroup(t *testing.T) {
|
||||||
g := new(FullMatcherGroup)
|
patterns := []struct {
|
||||||
g.Add("v2fly.org", 1)
|
Pattern string
|
||||||
g.Add("google.com", 2)
|
Value uint32
|
||||||
g.Add("x.a.com", 3)
|
}{
|
||||||
g.Add("x.y.com", 4)
|
{
|
||||||
g.Add("x.y.com", 6)
|
Pattern: "v2fly.org",
|
||||||
|
Value: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "google.com",
|
||||||
|
Value: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "x.a.com",
|
||||||
|
Value: 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "x.y.com",
|
||||||
|
Value: 4,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Pattern: "x.y.com",
|
||||||
|
Value: 6,
|
||||||
|
},
|
||||||
|
}
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
Domain string
|
Domain string
|
||||||
Result []uint32
|
Result []uint32
|
||||||
@ -32,7 +50,10 @@ func TestFullMatcherGroup(t *testing.T) {
|
|||||||
Result: []uint32{4, 6},
|
Result: []uint32{4, 6},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
g := new(FullMatcherGroup)
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
|
||||||
|
}
|
||||||
for _, testCase := range testCases {
|
for _, testCase := range testCases {
|
||||||
r := g.Match(testCase.Domain)
|
r := g.Match(testCase.Domain)
|
||||||
if !reflect.DeepEqual(r, testCase.Result) {
|
if !reflect.DeepEqual(r, testCase.Result) {
|
@ -2,7 +2,6 @@ package strmatcher
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"math/bits"
|
"math/bits"
|
||||||
"regexp"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
@ -20,79 +19,44 @@ func RollingHash(s string) uint32 {
|
|||||||
return h
|
return h
|
||||||
}
|
}
|
||||||
|
|
||||||
// A MphMatcherGroup is divided into three parts:
|
// MphMatcherGroup is an implementation of MatcherGroup.
|
||||||
// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table;
|
// It implements Rabin-Karp algorithm and minimal perfect hash table for Full and Domain matcher.
|
||||||
// 2. `substr` patterns are matched by ac automaton;
|
|
||||||
// 3. `regex` patterns are matched with the regex library.
|
|
||||||
type MphMatcherGroup struct {
|
type MphMatcherGroup struct {
|
||||||
ac *ACAutomaton
|
|
||||||
otherMatchers []matcherEntry
|
|
||||||
rules []string
|
rules []string
|
||||||
level0 []uint32
|
level0 []uint32
|
||||||
level0Mask int
|
level0Mask int
|
||||||
level1 []uint32
|
level1 []uint32
|
||||||
level1Mask int
|
level1Mask int
|
||||||
count uint32
|
|
||||||
ruleMap *map[string]uint32
|
ruleMap *map[string]uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *MphMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
|
|
||||||
h := RollingHash(pattern)
|
|
||||||
switch t {
|
|
||||||
case Domain:
|
|
||||||
(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
|
|
||||||
fallthrough
|
|
||||||
case Full:
|
|
||||||
(*g.ruleMap)[pattern] = h
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewMphMatcherGroup() *MphMatcherGroup {
|
func NewMphMatcherGroup() *MphMatcherGroup {
|
||||||
return &MphMatcherGroup{
|
return &MphMatcherGroup{
|
||||||
ac: nil,
|
|
||||||
otherMatchers: nil,
|
|
||||||
rules: nil,
|
rules: nil,
|
||||||
level0: nil,
|
level0: nil,
|
||||||
level0Mask: 0,
|
level0Mask: 0,
|
||||||
level1: nil,
|
level1: nil,
|
||||||
level1Mask: 0,
|
level1Mask: 0,
|
||||||
count: 1,
|
|
||||||
ruleMap: &map[string]uint32{},
|
ruleMap: &map[string]uint32{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddPattern adds a pattern to MphMatcherGroup
|
// AddFullMatcher implements MatcherGroupForFull.
|
||||||
func (g *MphMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
|
func (g *MphMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
|
||||||
switch t {
|
pattern := strings.ToLower(matcher.Pattern())
|
||||||
case Substr:
|
(*g.ruleMap)[pattern] = RollingHash(pattern)
|
||||||
if g.ac == nil {
|
|
||||||
g.ac = NewACAutomaton()
|
|
||||||
}
|
|
||||||
g.ac.Add(pattern, t)
|
|
||||||
case Full, Domain:
|
|
||||||
pattern = strings.ToLower(pattern)
|
|
||||||
g.AddFullOrDomainPattern(pattern, t)
|
|
||||||
case Regex:
|
|
||||||
r, err := regexp.Compile(pattern)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
g.otherMatchers = append(g.otherMatchers, matcherEntry{
|
|
||||||
m: ®exMatcher{pattern: r},
|
|
||||||
id: g.count,
|
|
||||||
})
|
|
||||||
default:
|
|
||||||
panic("Unknown type")
|
|
||||||
}
|
|
||||||
return g.count, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build builds a minimal perfect hash table and ac automaton from insert rules
|
// AddDomainMatcher implements MatcherGroupForDomain.
|
||||||
|
func (g *MphMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
|
||||||
|
pattern := strings.ToLower(matcher.Pattern())
|
||||||
|
h := RollingHash(pattern)
|
||||||
|
(*g.ruleMap)[pattern] = h
|
||||||
|
(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build builds a minimal perfect hash table for insert rules.
|
||||||
func (g *MphMatcherGroup) Build() {
|
func (g *MphMatcherGroup) Build() {
|
||||||
if g.ac != nil {
|
|
||||||
g.ac.Build()
|
|
||||||
}
|
|
||||||
keyLen := len(*g.ruleMap)
|
keyLen := len(*g.ruleMap)
|
||||||
if keyLen == 0 {
|
if keyLen == 0 {
|
||||||
keyLen = 1
|
keyLen = 1
|
||||||
@ -127,7 +91,7 @@ func (g *MphMatcherGroup) Build() {
|
|||||||
findSeed := true
|
findSeed := true
|
||||||
tmpOcc = tmpOcc[:0]
|
tmpOcc = tmpOcc[:0]
|
||||||
for _, i := range bucket.vals {
|
for _, i := range bucket.vals {
|
||||||
n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask
|
n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask // nosemgrep
|
||||||
if occ[n] {
|
if occ[n] {
|
||||||
for _, n := range tmpOcc {
|
for _, n := range tmpOcc {
|
||||||
occ[n] = false
|
occ[n] = false
|
||||||
@ -148,6 +112,34 @@ func (g *MphMatcherGroup) Build() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Lookup searches for s in t and returns its index and whether it was found.
|
||||||
|
func (g *MphMatcherGroup) Lookup(h uint32, s string) bool {
|
||||||
|
i0 := int(h) & g.level0Mask
|
||||||
|
seed := g.level0[i0]
|
||||||
|
i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask // nosemgrep
|
||||||
|
n := g.level1[i1]
|
||||||
|
return s == g.rules[int(n)]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match implements MatcherGroup.Match.
|
||||||
|
func (*MphMatcherGroup) Match(_ string) []uint32 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
|
func (g *MphMatcherGroup) MatchAny(pattern string) bool {
|
||||||
|
hash := uint32(0)
|
||||||
|
for i := len(pattern) - 1; i >= 0; i-- {
|
||||||
|
hash = hash*PrimeRK + uint32(pattern[i])
|
||||||
|
if pattern[i] == '.' {
|
||||||
|
if g.Lookup(hash, pattern[i:]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return g.Lookup(hash, pattern)
|
||||||
|
}
|
||||||
|
|
||||||
func nextPow2(v int) int {
|
func nextPow2(v int) int {
|
||||||
if v <= 1 {
|
if v <= 1 {
|
||||||
return 1
|
return 1
|
||||||
@ -157,45 +149,6 @@ func nextPow2(v int) int {
|
|||||||
return int(n)
|
return int(n)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lookup searches for s in t and returns its index and whether it was found.
|
|
||||||
func (g *MphMatcherGroup) Lookup(h uint32, s string) bool {
|
|
||||||
i0 := int(h) & g.level0Mask
|
|
||||||
seed := g.level0[i0]
|
|
||||||
i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask
|
|
||||||
n := g.level1[i1]
|
|
||||||
return s == g.rules[int(n)]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Match implements IndexMatcher.Match.
|
|
||||||
func (g *MphMatcherGroup) Match(pattern string) []uint32 {
|
|
||||||
result := []uint32{}
|
|
||||||
hash := uint32(0)
|
|
||||||
for i := len(pattern) - 1; i >= 0; i-- {
|
|
||||||
hash = hash*PrimeRK + uint32(pattern[i])
|
|
||||||
if pattern[i] == '.' {
|
|
||||||
if g.Lookup(hash, pattern[i:]) {
|
|
||||||
result = append(result, 1)
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if g.Lookup(hash, pattern) {
|
|
||||||
result = append(result, 1)
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
if g.ac != nil && g.ac.Match(pattern) {
|
|
||||||
result = append(result, 1)
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
for _, e := range g.otherMatchers {
|
|
||||||
if e.m.Match(pattern) {
|
|
||||||
result = append(result, e.id)
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type indexBucket struct {
|
type indexBucket struct {
|
||||||
n int
|
n int
|
||||||
vals []int
|
vals []int
|
||||||
@ -286,7 +239,7 @@ tail:
|
|||||||
}
|
}
|
||||||
|
|
||||||
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
|
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
|
||||||
return unsafe.Pointer(uintptr(p) + x)
|
return unsafe.Pointer(uintptr(p) + x) // nosemgrep
|
||||||
}
|
}
|
||||||
|
|
||||||
func readUnaligned32(p unsafe.Pointer) uint32 {
|
func readUnaligned32(p unsafe.Pointer) uint32 {
|
174
common/strmatcher/matchergroup_mph_test.go
Normal file
174
common/strmatcher/matchergroup_mph_test.go
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
package strmatcher_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/v2fly/v2ray-core/v4/common"
|
||||||
|
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMphMatcherGroup(t *testing.T) {
|
||||||
|
cases1 := []struct {
|
||||||
|
pattern string
|
||||||
|
mType Type
|
||||||
|
input string
|
||||||
|
output bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "www.v2fly.org",
|
||||||
|
output: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "v2fly.org",
|
||||||
|
output: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "www.v3fly.org",
|
||||||
|
output: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "2fly.org",
|
||||||
|
output: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
input: "xv2fly.org",
|
||||||
|
output: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Full,
|
||||||
|
input: "v2fly.org",
|
||||||
|
output: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Full,
|
||||||
|
input: "xv2fly.org",
|
||||||
|
output: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range cases1 {
|
||||||
|
mph := NewMphMatcherGroup()
|
||||||
|
matcher, err := test.mType.New(test.pattern)
|
||||||
|
common.Must(err)
|
||||||
|
common.Must(AddMatcherToGroup(mph, matcher, 0))
|
||||||
|
mph.Build()
|
||||||
|
if m := mph.MatchAny(test.input); m != test.output {
|
||||||
|
t.Error("unexpected output: ", m, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
cases2Input := []struct {
|
||||||
|
pattern string
|
||||||
|
mType Type
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "163.com",
|
||||||
|
mType: Domain,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "m.126.com",
|
||||||
|
mType: Full,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "3.com",
|
||||||
|
mType: Full,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
mph := NewMphMatcherGroup()
|
||||||
|
for _, test := range cases2Input {
|
||||||
|
matcher, err := test.mType.New(test.pattern)
|
||||||
|
common.Must(err)
|
||||||
|
common.Must(AddMatcherToGroup(mph, matcher, 0))
|
||||||
|
}
|
||||||
|
mph.Build()
|
||||||
|
cases2Output := []struct {
|
||||||
|
pattern string
|
||||||
|
res bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "126.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "m.163.com",
|
||||||
|
res: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "mm163.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "m.126.com",
|
||||||
|
res: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "163.com",
|
||||||
|
res: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "63.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "oogle.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "vvgoogle.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range cases2Output {
|
||||||
|
if m := mph.MatchAny(test.pattern); m != test.res {
|
||||||
|
t.Error("unexpected output: ", m, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
cases3Input := []struct {
|
||||||
|
pattern string
|
||||||
|
mType Type
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "video.google.com",
|
||||||
|
mType: Domain,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "gle.com",
|
||||||
|
mType: Domain,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
mph := NewMphMatcherGroup()
|
||||||
|
for _, test := range cases3Input {
|
||||||
|
matcher, err := test.mType.New(test.pattern)
|
||||||
|
common.Must(err)
|
||||||
|
common.Must(AddMatcherToGroup(mph, matcher, 0))
|
||||||
|
}
|
||||||
|
mph.Build()
|
||||||
|
cases3Output := []struct {
|
||||||
|
pattern string
|
||||||
|
res bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "google.com",
|
||||||
|
res: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, test := range cases3Output {
|
||||||
|
if m := mph.MatchAny(test.pattern); m != test.res {
|
||||||
|
t.Error("unexpected output: ", m, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
36
common/strmatcher/matchergroup_simple.go
Normal file
36
common/strmatcher/matchergroup_simple.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
package strmatcher
|
||||||
|
|
||||||
|
type matcherEntry struct {
|
||||||
|
matcher Matcher
|
||||||
|
value uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// SimpleMatcherGroup is an implementation of MatcherGroup.
|
||||||
|
// It simply stores all matchers in an array and sequentially matches them.
|
||||||
|
type SimpleMatcherGroup struct {
|
||||||
|
matchers []matcherEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddMatcher implements MatcherGroupForAll.AddMatcher.
|
||||||
|
func (g *SimpleMatcherGroup) AddMatcher(matcher Matcher, value uint32) {
|
||||||
|
g.matchers = append(g.matchers, matcherEntry{
|
||||||
|
matcher: matcher,
|
||||||
|
value: value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match implements MatcherGroup.Match.
|
||||||
|
func (g *SimpleMatcherGroup) Match(input string) []uint32 {
|
||||||
|
result := []uint32{}
|
||||||
|
for _, e := range g.matchers {
|
||||||
|
if e.matcher.Match(input) {
|
||||||
|
result = append(result, e.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
|
func (g *SimpleMatcherGroup) MatchAny(input string) bool {
|
||||||
|
return len(g.Match(input)) > 0
|
||||||
|
}
|
69
common/strmatcher/matchergroup_simple_test.go
Normal file
69
common/strmatcher/matchergroup_simple_test.go
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
package strmatcher_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/v2fly/v2ray-core/v4/common"
|
||||||
|
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSimpleMatcherGroup(t *testing.T) {
|
||||||
|
patterns := []struct {
|
||||||
|
pattern string
|
||||||
|
mType Type
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Domain,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Full,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "v2fly.org",
|
||||||
|
mType: Regex,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cases := []struct {
|
||||||
|
input string
|
||||||
|
output []uint32
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
input: "www.v2fly.org",
|
||||||
|
output: []uint32{0, 2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "v2fly.org",
|
||||||
|
output: []uint32{0, 1, 2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "www.v3fly.org",
|
||||||
|
output: []uint32{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "2fly.org",
|
||||||
|
output: []uint32{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "xv2fly.org",
|
||||||
|
output: []uint32{2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "v2flyxorg",
|
||||||
|
output: []uint32{2},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
matcherGroup := &SimpleMatcherGroup{}
|
||||||
|
for id, entry := range patterns {
|
||||||
|
matcher, err := entry.mType.New(entry.pattern)
|
||||||
|
common.Must(err)
|
||||||
|
common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id)))
|
||||||
|
}
|
||||||
|
for _, test := range cases {
|
||||||
|
if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) {
|
||||||
|
t.Error("unexpected output: ", r, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
47
common/strmatcher/matchergroup_substr.go
Normal file
47
common/strmatcher/matchergroup_substr.go
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
package strmatcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SubstrMatcherGroup is implementation of MatcherGroup,
|
||||||
|
// It is simply implmeneted to comply with the priority specification of Substr matchers.
|
||||||
|
type SubstrMatcherGroup struct {
|
||||||
|
patterns []string
|
||||||
|
values []uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
|
||||||
|
func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint32) {
|
||||||
|
g.patterns = append(g.patterns, matcher.Pattern())
|
||||||
|
g.values = append(g.values, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match implements MatcherGroup.Match.
|
||||||
|
func (g *SubstrMatcherGroup) Match(input string) []uint32 {
|
||||||
|
result := []uint32{}
|
||||||
|
for i, pattern := range g.patterns {
|
||||||
|
for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
|
||||||
|
result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Sort the match results in dictionary order, so that:
|
||||||
|
// 1. Pattern matched at smaller position (meaning matched further) takes precedence.
|
||||||
|
// 2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
|
||||||
|
sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
|
||||||
|
for i, entry := range result {
|
||||||
|
result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAny implements MatcherGroup.MatchAny.
|
||||||
|
func (g *SubstrMatcherGroup) MatchAny(input string) bool {
|
||||||
|
for _, pattern := range g.patterns {
|
||||||
|
if strings.Contains(input, pattern) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
65
common/strmatcher/matchergroup_substr_test.go
Normal file
65
common/strmatcher/matchergroup_substr_test.go
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
package strmatcher_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/v2fly/v2ray-core/v4/common"
|
||||||
|
. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSubstrMatcherGroup(t *testing.T) {
|
||||||
|
patterns := []struct {
|
||||||
|
pattern string
|
||||||
|
mType Type
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "apis",
|
||||||
|
mType: Substr,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "google",
|
||||||
|
mType: Substr,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "apis",
|
||||||
|
mType: Substr,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cases := []struct {
|
||||||
|
input string
|
||||||
|
output []uint32
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
input: "google.com",
|
||||||
|
output: []uint32{1},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "apis.com",
|
||||||
|
output: []uint32{0, 2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "googleapis.com",
|
||||||
|
output: []uint32{1, 0, 2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "fonts.googleapis.com",
|
||||||
|
output: []uint32{1, 0, 2},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "apis.googleapis.com",
|
||||||
|
output: []uint32{0, 2, 1, 0, 2},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
matcherGroup := &SubstrMatcherGroup{}
|
||||||
|
for id, entry := range patterns {
|
||||||
|
matcher, err := entry.mType.New(entry.pattern)
|
||||||
|
common.Must(err)
|
||||||
|
common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id)))
|
||||||
|
}
|
||||||
|
for _, test := range cases {
|
||||||
|
if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) {
|
||||||
|
t.Error("unexpected output: ", r, " for test case ", test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,52 +1,167 @@
|
|||||||
package strmatcher
|
package strmatcher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type fullMatcher string
|
// FullMatcher is an implementation of Matcher.
|
||||||
|
type FullMatcher string
|
||||||
|
|
||||||
func (m fullMatcher) Match(s string) bool {
|
func (FullMatcher) Type() Type {
|
||||||
|
return Full
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m FullMatcher) Pattern() string {
|
||||||
|
return string(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m FullMatcher) String() string {
|
||||||
|
return "full:" + m.Pattern()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m FullMatcher) Match(s string) bool {
|
||||||
return string(m) == s
|
return string(m) == s
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m fullMatcher) String() string {
|
// DomainMatcher is an implementation of Matcher.
|
||||||
return "full:" + string(m)
|
type DomainMatcher string
|
||||||
|
|
||||||
|
func (DomainMatcher) Type() Type {
|
||||||
|
return Domain
|
||||||
}
|
}
|
||||||
|
|
||||||
type substrMatcher string
|
func (m DomainMatcher) Pattern() string {
|
||||||
|
return string(m)
|
||||||
func (m substrMatcher) Match(s string) bool {
|
|
||||||
return strings.Contains(s, string(m))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m substrMatcher) String() string {
|
func (m DomainMatcher) String() string {
|
||||||
return "keyword:" + string(m)
|
return "domain:" + m.Pattern()
|
||||||
}
|
}
|
||||||
|
|
||||||
type domainMatcher string
|
func (m DomainMatcher) Match(s string) bool {
|
||||||
|
pattern := m.Pattern()
|
||||||
func (m domainMatcher) Match(s string) bool {
|
|
||||||
pattern := string(m)
|
|
||||||
if !strings.HasSuffix(s, pattern) {
|
if !strings.HasSuffix(s, pattern) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
|
return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m domainMatcher) String() string {
|
// SubstrMatcher is an implementation of Matcher.
|
||||||
return "domain:" + string(m)
|
type SubstrMatcher string
|
||||||
|
|
||||||
|
func (SubstrMatcher) Type() Type {
|
||||||
|
return Substr
|
||||||
}
|
}
|
||||||
|
|
||||||
type regexMatcher struct {
|
func (m SubstrMatcher) Pattern() string {
|
||||||
|
return string(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m SubstrMatcher) String() string {
|
||||||
|
return "keyword:" + m.Pattern()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m SubstrMatcher) Match(s string) bool {
|
||||||
|
return strings.Contains(s, m.Pattern())
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegexMatcher is an implementation of Matcher.
|
||||||
|
type RegexMatcher struct {
|
||||||
pattern *regexp.Regexp
|
pattern *regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *regexMatcher) Match(s string) bool {
|
func (*RegexMatcher) Type() Type {
|
||||||
|
return Regex
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *RegexMatcher) Pattern() string {
|
||||||
|
return m.pattern.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *RegexMatcher) String() string {
|
||||||
|
return "regexp:" + m.Pattern()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *RegexMatcher) Match(s string) bool {
|
||||||
return m.pattern.MatchString(s)
|
return m.pattern.MatchString(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *regexMatcher) String() string {
|
// New creates a new Matcher based on the given pattern.
|
||||||
return "regexp:" + m.pattern.String()
|
func (t Type) New(pattern string) (Matcher, error) {
|
||||||
|
switch t {
|
||||||
|
case Full:
|
||||||
|
return FullMatcher(pattern), nil
|
||||||
|
case Substr:
|
||||||
|
return SubstrMatcher(pattern), nil
|
||||||
|
case Domain:
|
||||||
|
return DomainMatcher(pattern), nil
|
||||||
|
case Regex: // 1. regex matching is case-sensitive
|
||||||
|
regex, err := regexp.Compile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &RegexMatcher{pattern: regex}, nil
|
||||||
|
default:
|
||||||
|
panic("Unknown type")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
|
||||||
|
type MatcherGroupForAll interface {
|
||||||
|
AddMatcher(matcher Matcher, value uint32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers.
|
||||||
|
type MatcherGroupForFull interface {
|
||||||
|
AddFullMatcher(matcher FullMatcher, value uint32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers.
|
||||||
|
type MatcherGroupForDomain interface {
|
||||||
|
AddDomainMatcher(matcher DomainMatcher, value uint32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers.
|
||||||
|
type MatcherGroupForSubstr interface {
|
||||||
|
AddSubstrMatcher(matcher SubstrMatcher, value uint32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers.
|
||||||
|
type MatcherGroupForRegex interface {
|
||||||
|
AddRegexMatcher(matcher *RegexMatcher, value uint32)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddMatcherGroup is a helper function to try to add a Matcher to any kind of MatcherGroup.
|
||||||
|
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
|
||||||
|
// This function is provided to help writing code to test a MatcherGroup.
|
||||||
|
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
||||||
|
if g, ok := g.(MatcherGroupForAll); ok {
|
||||||
|
g.AddMatcher(matcher, value)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
switch matcher := matcher.(type) {
|
||||||
|
case FullMatcher:
|
||||||
|
if g, ok := g.(MatcherGroupForFull); ok {
|
||||||
|
g.AddFullMatcher(matcher, value)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
case DomainMatcher:
|
||||||
|
if g, ok := g.(MatcherGroupForDomain); ok {
|
||||||
|
g.AddDomainMatcher(matcher, value)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
case SubstrMatcher:
|
||||||
|
if g, ok := g.(MatcherGroupForSubstr); ok {
|
||||||
|
g.AddSubstrMatcher(matcher, value)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
case *RegexMatcher:
|
||||||
|
if g, ok := g.(MatcherGroupForRegex); ok {
|
||||||
|
g.AddRegexMatcher(matcher, value)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return errors.New("cannot add matcher to matcher group")
|
||||||
}
|
}
|
||||||
|
@ -71,172 +71,3 @@ func TestMatcher(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestACAutomaton(t *testing.T) {
|
|
||||||
cases1 := []struct {
|
|
||||||
pattern string
|
|
||||||
mType Type
|
|
||||||
input string
|
|
||||||
output bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
pattern: "v2fly.org",
|
|
||||||
mType: Domain,
|
|
||||||
input: "www.v2fly.org",
|
|
||||||
output: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "v2fly.org",
|
|
||||||
mType: Domain,
|
|
||||||
input: "v2fly.org",
|
|
||||||
output: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "v2fly.org",
|
|
||||||
mType: Domain,
|
|
||||||
input: "www.v3fly.org",
|
|
||||||
output: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "v2fly.org",
|
|
||||||
mType: Domain,
|
|
||||||
input: "2fly.org",
|
|
||||||
output: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "v2fly.org",
|
|
||||||
mType: Domain,
|
|
||||||
input: "xv2fly.org",
|
|
||||||
output: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "v2fly.org",
|
|
||||||
mType: Full,
|
|
||||||
input: "v2fly.org",
|
|
||||||
output: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "v2fly.org",
|
|
||||||
mType: Full,
|
|
||||||
input: "xv2fly.org",
|
|
||||||
output: false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, test := range cases1 {
|
|
||||||
ac := NewACAutomaton()
|
|
||||||
ac.Add(test.pattern, test.mType)
|
|
||||||
ac.Build()
|
|
||||||
if m := ac.Match(test.input); m != test.output {
|
|
||||||
t.Error("unexpected output: ", m, " for test case ", test)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
cases2Input := []struct {
|
|
||||||
pattern string
|
|
||||||
mType Type
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
pattern: "163.com",
|
|
||||||
mType: Domain,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "m.126.com",
|
|
||||||
mType: Full,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "3.com",
|
|
||||||
mType: Full,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "google.com",
|
|
||||||
mType: Substr,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "vgoogle.com",
|
|
||||||
mType: Substr,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
ac := NewACAutomaton()
|
|
||||||
for _, test := range cases2Input {
|
|
||||||
ac.Add(test.pattern, test.mType)
|
|
||||||
}
|
|
||||||
ac.Build()
|
|
||||||
cases2Output := []struct {
|
|
||||||
pattern string
|
|
||||||
res bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
pattern: "126.com",
|
|
||||||
res: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "m.163.com",
|
|
||||||
res: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "mm163.com",
|
|
||||||
res: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "m.126.com",
|
|
||||||
res: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "163.com",
|
|
||||||
res: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "63.com",
|
|
||||||
res: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "oogle.com",
|
|
||||||
res: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "vvgoogle.com",
|
|
||||||
res: true,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, test := range cases2Output {
|
|
||||||
if m := ac.Match(test.pattern); m != test.res {
|
|
||||||
t.Error("unexpected output: ", m, " for test case ", test)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
cases3Input := []struct {
|
|
||||||
pattern string
|
|
||||||
mType Type
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
pattern: "video.google.com",
|
|
||||||
mType: Domain,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
pattern: "gle.com",
|
|
||||||
mType: Domain,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
ac := NewACAutomaton()
|
|
||||||
for _, test := range cases3Input {
|
|
||||||
ac.Add(test.pattern, test.mType)
|
|
||||||
}
|
|
||||||
ac.Build()
|
|
||||||
cases3Output := []struct {
|
|
||||||
pattern string
|
|
||||||
res bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
pattern: "google.com",
|
|
||||||
res: false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, test := range cases3Output {
|
|
||||||
if m := ac.Match(test.pattern); m != test.res {
|
|
||||||
t.Error("unexpected output: ", m, " for test case ", test)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,107 +1,74 @@
|
|||||||
package strmatcher
|
package strmatcher
|
||||||
|
|
||||||
import (
|
|
||||||
"regexp"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Matcher is the interface to determine a string matches a pattern.
|
|
||||||
type Matcher interface {
|
|
||||||
// Match returns true if the given string matches a predefined pattern.
|
|
||||||
Match(string) bool
|
|
||||||
String() string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Type is the type of the matcher.
|
// Type is the type of the matcher.
|
||||||
type Type byte
|
type Type byte
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// Full is the type of matcher that the input string must exactly equal to the pattern.
|
// Full is the type of matcher that the input string must exactly equal to the pattern.
|
||||||
Full Type = iota
|
Full Type = 0
|
||||||
// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
|
|
||||||
Substr
|
|
||||||
// Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
|
// Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
|
||||||
Domain
|
Domain Type = 1
|
||||||
|
// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
|
||||||
|
Substr Type = 2
|
||||||
// Regex is the type of matcher that the input string must matches the regular-expression pattern.
|
// Regex is the type of matcher that the input string must matches the regular-expression pattern.
|
||||||
Regex
|
Regex Type = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
// New creates a new Matcher based on the given pattern.
|
// Matcher is the interface to determine a string matches a pattern.
|
||||||
func (t Type) New(pattern string) (Matcher, error) {
|
// * This is a basic matcher to represent a certain kind of match semantic(full, substr, domain or regex).
|
||||||
// 1. regex matching is case-sensitive
|
type Matcher interface {
|
||||||
switch t {
|
// Type returns the matcher's type.
|
||||||
case Full:
|
Type() Type
|
||||||
return fullMatcher(pattern), nil
|
|
||||||
case Substr:
|
// Pattern returns the matcher's raw string representation.
|
||||||
return substrMatcher(pattern), nil
|
Pattern() string
|
||||||
case Domain:
|
|
||||||
return domainMatcher(pattern), nil
|
// String returns a string representation of the matcher containing its type and pattern.
|
||||||
case Regex:
|
String() string
|
||||||
r, err := regexp.Compile(pattern)
|
|
||||||
if err != nil {
|
// Match returns true if the given string matches a predefined pattern.
|
||||||
return nil, err
|
// * This method is seldom used for performance reason
|
||||||
}
|
// and is generally taken over by their corresponding MatcherGroup.
|
||||||
return ®exMatcher{
|
Match(input string) bool
|
||||||
pattern: r,
|
|
||||||
}, nil
|
|
||||||
default:
|
|
||||||
panic("Unknown type")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IndexMatcher is the interface for matching with a group of matchers.
|
// MatcherGroup is an advanced type of matcher to accept a bunch of basic Matchers (of certain type, not all matcher types).
|
||||||
type IndexMatcher interface {
|
// For example:
|
||||||
// Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists.
|
// * FullMatcherGroup accepts FullMatcher and uses a hash table to facilitate lookup.
|
||||||
|
// * DomainMatcherGroup accepts DomainMatcher and uses a trie to optimize both memory consumption and lookup speed.
|
||||||
|
type MatcherGroup interface {
|
||||||
|
// Match returns all matched matchers with their corresponding values.
|
||||||
Match(input string) []uint32
|
Match(input string) []uint32
|
||||||
|
|
||||||
|
// MatchAny returns true as soon as one matching matcher is found.
|
||||||
|
MatchAny(input string) bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type matcherEntry struct {
|
// IndexMatcher is a general type of matcher thats accepts all kinds of basic matchers.
|
||||||
m Matcher
|
// It should:
|
||||||
id uint32
|
// * Accept all Matcher types with no exception.
|
||||||
}
|
// * Optimize string matching with a combination of MatcherGroups.
|
||||||
|
// * Obey certain priority order specification when returning matched Matchers.
|
||||||
// MatcherGroup is an implementation of IndexMatcher.
|
type IndexMatcher interface {
|
||||||
// Empty initialization works.
|
// Size returns number of matchers added to IndexMatcher.
|
||||||
type MatcherGroup struct {
|
Size() uint32
|
||||||
count uint32
|
|
||||||
fullMatcher FullMatcherGroup
|
// Add adds a new Matcher to IndexMatcher, and returns its index. The index will never be 0.
|
||||||
domainMatcher DomainMatcherGroup
|
Add(matcher Matcher) uint32
|
||||||
otherMatchers []matcherEntry
|
|
||||||
}
|
// Build builds the IndexMatcher to be ready for matching.
|
||||||
|
Build() error
|
||||||
// Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0.
|
|
||||||
func (g *MatcherGroup) Add(m Matcher) uint32 {
|
// Match returns the indices of all matchers that matches the input.
|
||||||
g.count++
|
// * Empty array is returned if no such matcher exists.
|
||||||
c := g.count
|
// * The order of returned matchers should follow priority specification.
|
||||||
|
// Priority specification:
|
||||||
switch tm := m.(type) {
|
// 1. Priority between matcher types: full > domain > substr > regex.
|
||||||
case fullMatcher:
|
// 2. Priority of same-priority matchers matching at same position: the early added takes precedence.
|
||||||
g.fullMatcher.addMatcher(tm, c)
|
// 3. Priority of domain matchers matching at different levels: the further matched domain takes precedence.
|
||||||
case domainMatcher:
|
// 4. Priority of substr matchers matching at different positions: the further matched substr takes precedence.
|
||||||
g.domainMatcher.addMatcher(tm, c)
|
Match(input string) []uint32
|
||||||
default:
|
|
||||||
g.otherMatchers = append(g.otherMatchers, matcherEntry{
|
// MatchAny returns true as soon as one matching matcher is found.
|
||||||
m: m,
|
MatchAny(input string) bool
|
||||||
id: c,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return c
|
|
||||||
}
|
|
||||||
|
|
||||||
// Match implements IndexMatcher.Match.
|
|
||||||
func (g *MatcherGroup) Match(pattern string) []uint32 {
|
|
||||||
result := []uint32{}
|
|
||||||
result = append(result, g.fullMatcher.Match(pattern)...)
|
|
||||||
result = append(result, g.domainMatcher.Match(pattern)...)
|
|
||||||
for _, e := range g.otherMatchers {
|
|
||||||
if e.m.Match(pattern) {
|
|
||||||
result = append(result, e.id)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// Size returns the number of matchers in the MatcherGroup.
|
|
||||||
func (g *MatcherGroup) Size() uint32 {
|
|
||||||
return g.count
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user