1
0
mirror of https://github.com/v2fly/v2ray-core.git synced 2025-01-20 08:16:55 -05:00
v2fly/common/strmatcher/strmatcher.go

201 lines
4.6 KiB
Go
Raw Normal View History

2018-06-26 15:57:41 -04:00
package strmatcher
2018-08-19 15:04:15 -04:00
import (
"regexp"
)
2018-06-26 15:57:41 -04:00
// PrimeRK is the prime base used in Rabin-Karp algorithm.
const PrimeRK = 16777619
2018-08-20 09:39:58 -04:00
// Matcher is the interface to determine a string matches a pattern.
2018-06-26 15:57:41 -04:00
type Matcher interface {
2018-08-20 09:39:58 -04:00
// Match returns true if the given string matches a predefined pattern.
2018-06-26 15:57:41 -04:00
Match(string) bool
String() string
2018-06-26 15:57:41 -04:00
}
2018-08-20 09:39:58 -04:00
// Type is the type of the matcher.
2018-06-26 15:57:41 -04:00
type Type byte
const (
2018-08-20 09:39:58 -04:00
// Full is the type of matcher that the input string must exactly equal to the pattern.
2018-06-26 15:57:41 -04:00
Full Type = iota
2018-08-20 09:39:58 -04:00
// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
2018-06-26 15:57:41 -04:00
Substr
2018-08-20 09:39:58 -04:00
// Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
2018-06-26 15:57:41 -04:00
Domain
2018-08-20 09:39:58 -04:00
// Regex is the type of matcher that the input string must matches the regular-expression pattern.
2018-06-26 15:57:41 -04:00
Regex
)
2018-08-20 09:39:58 -04:00
// New creates a new Matcher based on the given pattern.
2018-06-26 15:57:41 -04:00
func (t Type) New(pattern string) (Matcher, error) {
switch t {
case Full:
return fullMatcher(pattern), nil
case Substr:
return substrMatcher(pattern), nil
case Domain:
return domainMatcher(pattern), nil
case Regex:
r, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return &regexMatcher{
pattern: r,
}, nil
default:
panic("Unknown type")
}
}
2018-08-20 09:39:58 -04:00
// IndexMatcher is the interface for matching with a group of matchers.
2018-08-19 15:04:15 -04:00
type IndexMatcher interface {
2020-08-31 05:53:16 -04:00
// Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists.
Match(input string) []uint32
2018-08-19 15:04:15 -04:00
}
2018-06-26 15:57:41 -04:00
type matcherEntry struct {
m Matcher
id uint32
}
type ACAutomatonMatcherGroup struct {
count uint32
ac *ACAutomaton
nonSubstrMap map[uint32]string
otherMatchers []matcherEntry
}
func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
var g = new(ACAutomatonMatcherGroup)
g.count = 1
g.nonSubstrMap = map[uint32]string{}
return g
}
// Add `full` or `domain` pattern to hashmap
func (g *ACAutomatonMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
h := uint32(0)
for i := len(pattern) - 1; i >= 0; i-- {
h = h*PrimeRK + uint32(pattern[i])
}
switch t {
case Full:
g.nonSubstrMap[h] = pattern
case Domain:
g.nonSubstrMap[h] = pattern
g.nonSubstrMap[h*PrimeRK+uint32('.')] = "." + pattern
default:
}
}
func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
switch t {
case Substr:
if g.ac == nil {
g.ac = NewACAutomaton()
}
g.ac.Add(pattern, t)
case Full, Domain:
g.AddFullOrDomainPattern(pattern, t)
case Regex:
g.count++
r, err := regexp.Compile(pattern)
if err != nil {
return 0, err
}
g.otherMatchers = append(g.otherMatchers, matcherEntry{
m: &regexMatcher{pattern: r},
id: g.count,
})
default:
panic("Unknown type")
}
return g.count, nil
}
func (g *ACAutomatonMatcherGroup) Build() {
if g.ac != nil {
g.ac.Build()
}
}
// Match implements IndexMatcher.Match.
func (g *ACAutomatonMatcherGroup) Match(pattern string) []uint32 {
result := []uint32{}
hash := uint32(0)
for i := len(pattern) - 1; i >= 0; i-- {
hash = hash*PrimeRK + uint32(pattern[i])
if pattern[i] == '.' {
if v, ok := g.nonSubstrMap[hash]; ok && v == pattern[i:] {
result = append(result, 1)
return result
}
}
}
if v, ok := g.nonSubstrMap[hash]; ok && v == pattern {
result = append(result, 1)
return result
}
if g.ac != nil && g.ac.Match(pattern) {
result = append(result, 1)
return result
}
for _, e := range g.otherMatchers {
if e.m.Match(pattern) {
result = append(result, e.id)
return result
}
}
return result
}
2018-08-20 09:39:58 -04:00
// MatcherGroup is an implementation of IndexMatcher.
// Empty initialization works.
2018-06-26 15:57:41 -04:00
type MatcherGroup struct {
count uint32
2018-08-20 03:57:06 -04:00
fullMatcher FullMatcherGroup
2018-08-19 15:04:15 -04:00
domainMatcher DomainMatcherGroup
2018-06-26 15:57:41 -04:00
otherMatchers []matcherEntry
}
2018-08-20 09:39:58 -04:00
// Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0.
2018-06-26 15:57:41 -04:00
func (g *MatcherGroup) Add(m Matcher) uint32 {
g.count++
2018-08-20 03:57:06 -04:00
c := g.count
2018-06-26 15:57:41 -04:00
2018-08-19 15:04:15 -04:00
switch tm := m.(type) {
case fullMatcher:
2018-08-20 03:57:06 -04:00
g.fullMatcher.addMatcher(tm, c)
2018-08-19 15:04:15 -04:00
case domainMatcher:
2018-08-20 03:57:06 -04:00
g.domainMatcher.addMatcher(tm, c)
2018-08-19 15:04:15 -04:00
default:
2018-06-26 15:57:41 -04:00
g.otherMatchers = append(g.otherMatchers, matcherEntry{
m: m,
id: c,
})
}
return c
}
2018-08-20 09:39:58 -04:00
// Match implements IndexMatcher.Match.
func (g *MatcherGroup) Match(pattern string) []uint32 {
result := []uint32{}
result = append(result, g.fullMatcher.Match(pattern)...)
result = append(result, g.domainMatcher.Match(pattern)...)
2018-06-26 15:57:41 -04:00
for _, e := range g.otherMatchers {
if e.m.Match(pattern) {
result = append(result, e.id)
2018-06-26 15:57:41 -04:00
}
}
return result
2018-06-26 15:57:41 -04:00
}
2018-08-20 09:39:58 -04:00
// Size returns the number of matchers in the MatcherGroup.
2018-06-26 15:57:41 -04:00
func (g *MatcherGroup) Size() uint32 {
return g.count
}