1
0
mirror of https://github.com/v2fly/v2ray-core.git synced 2024-12-31 14:36:50 -05:00
v2fly/common/strmatcher/matchers.go
Vigilans 32475d9239
[app/dns] Add tests for idn support (#2213)
Co-authored-by: kallydev <kallydev@gmail.com>

Co-authored-by: kallydev <kallydev@gmail.com>
2022-12-15 10:26:53 +08:00

291 lines
7.2 KiB
Go

package strmatcher
import (
"errors"
"regexp"
"strings"
"unicode/utf8"
"golang.org/x/net/idna"
)
// FullMatcher is an implementation of Matcher.
type FullMatcher string
func (FullMatcher) Type() Type {
return Full
}
func (m FullMatcher) Pattern() string {
return string(m)
}
func (m FullMatcher) String() string {
return "full:" + m.Pattern()
}
func (m FullMatcher) Match(s string) bool {
return string(m) == s
}
// DomainMatcher is an implementation of Matcher.
type DomainMatcher string
func (DomainMatcher) Type() Type {
return Domain
}
func (m DomainMatcher) Pattern() string {
return string(m)
}
func (m DomainMatcher) String() string {
return "domain:" + m.Pattern()
}
func (m DomainMatcher) Match(s string) bool {
pattern := m.Pattern()
if !strings.HasSuffix(s, pattern) {
return false
}
return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
}
// SubstrMatcher is an implementation of Matcher.
type SubstrMatcher string
func (SubstrMatcher) Type() Type {
return Substr
}
func (m SubstrMatcher) Pattern() string {
return string(m)
}
func (m SubstrMatcher) String() string {
return "keyword:" + m.Pattern()
}
func (m SubstrMatcher) Match(s string) bool {
return strings.Contains(s, m.Pattern())
}
// RegexMatcher is an implementation of Matcher.
type RegexMatcher struct {
pattern *regexp.Regexp
}
func (*RegexMatcher) Type() Type {
return Regex
}
func (m *RegexMatcher) Pattern() string {
return m.pattern.String()
}
func (m *RegexMatcher) String() string {
return "regexp:" + m.Pattern()
}
func (m *RegexMatcher) Match(s string) bool {
return m.pattern.MatchString(s)
}
// New creates a new Matcher based on the given pattern.
func (t Type) New(pattern string) (Matcher, error) {
switch t {
case Full:
return FullMatcher(pattern), nil
case Substr:
return SubstrMatcher(pattern), nil
case Domain:
pattern, err := ToDomain(pattern)
if err != nil {
return nil, err
}
return DomainMatcher(pattern), nil
case Regex: // 1. regex matching is case-sensitive
regex, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return &RegexMatcher{pattern: regex}, nil
default:
return nil, errors.New("unknown matcher type")
}
}
// NewDomainPattern creates a new Matcher based on the given domain pattern.
// It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern.
func (t Type) NewDomainPattern(pattern string) (Matcher, error) {
switch t {
case Full:
pattern, err := ToDomain(pattern)
if err != nil {
return nil, err
}
return FullMatcher(pattern), nil
case Substr:
pattern, err := ToDomain(pattern)
if err != nil {
return nil, err
}
return SubstrMatcher(pattern), nil
case Domain:
pattern, err := ToDomain(pattern)
if err != nil {
return nil, err
}
return DomainMatcher(pattern), nil
case Regex: // Regex's charset not in LDH subset
regex, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return &RegexMatcher{pattern: regex}, nil
default:
return nil, errors.New("unknown matcher type")
}
}
// ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made.
// 1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952):
// * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers)
// * Digits 0 to 9
// * Hyphens(-) and Periods(.)
// 2. If any non-ASCII characters, domain are converted from Internationalized domain name to Punycode.
func ToDomain(pattern string) (string, error) {
for {
isASCII, hasUpper := true, false
for i := 0; i < len(pattern); i++ {
c := pattern[i]
if c >= utf8.RuneSelf {
isASCII = false
break
}
switch {
case 'A' <= c && c <= 'Z':
hasUpper = true
case 'a' <= c && c <= 'z':
case '0' <= c && c <= '9':
case c == '-':
case c == '.':
default:
return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset")
}
}
if !isASCII {
var err error
pattern, err = idna.Punycode.ToASCII(pattern)
if err != nil {
return "", err
}
continue
}
if hasUpper {
pattern = strings.ToLower(pattern)
}
break
}
return pattern, nil
}
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
type MatcherGroupForAll interface {
AddMatcher(matcher Matcher, value uint32)
}
// MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers.
type MatcherGroupForFull interface {
AddFullMatcher(matcher FullMatcher, value uint32)
}
// MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers.
type MatcherGroupForDomain interface {
AddDomainMatcher(matcher DomainMatcher, value uint32)
}
// MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers.
type MatcherGroupForSubstr interface {
AddSubstrMatcher(matcher SubstrMatcher, value uint32)
}
// MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers.
type MatcherGroupForRegex interface {
AddRegexMatcher(matcher *RegexMatcher, value uint32)
}
// AddMatcherToGroup is a helper function to try to add a Matcher to any kind of MatcherGroup.
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
// This function is provided to help writing code to test a MatcherGroup.
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
if g, ok := g.(IndexMatcher); ok {
g.Add(matcher)
return nil
}
if g, ok := g.(MatcherGroupForAll); ok {
g.AddMatcher(matcher, value)
return nil
}
switch matcher := matcher.(type) {
case FullMatcher:
if g, ok := g.(MatcherGroupForFull); ok {
g.AddFullMatcher(matcher, value)
return nil
}
case DomainMatcher:
if g, ok := g.(MatcherGroupForDomain); ok {
g.AddDomainMatcher(matcher, value)
return nil
}
case SubstrMatcher:
if g, ok := g.(MatcherGroupForSubstr); ok {
g.AddSubstrMatcher(matcher, value)
return nil
}
case *RegexMatcher:
if g, ok := g.(MatcherGroupForRegex); ok {
g.AddRegexMatcher(matcher, value)
return nil
}
}
return errors.New("cannot add matcher to matcher group")
}
// CompositeMatches flattens the matches slice to produce a single matched indices slice.
// It is designed to avoid new memory allocation as possible.
func CompositeMatches(matches [][]uint32) []uint32 {
switch len(matches) {
case 0:
return nil
case 1:
return matches[0]
default:
result := make([]uint32, 0, 5)
for i := 0; i < len(matches); i++ {
result = append(result, matches[i]...)
}
return result
}
}
// CompositeMatches flattens the matches slice to produce a single matched indices slice.
// It is designed that:
// 1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher.
// 2. Indices in the same matcher keeps their original order.
// 3. Avoid new memory allocation as possible.
func CompositeMatchesReverse(matches [][]uint32) []uint32 {
switch len(matches) {
case 0:
return nil
case 1:
return matches[0]
default:
result := make([]uint32, 0, 5)
for i := len(matches) - 1; i >= 0; i-- {
result = append(result, matches[i]...)
}
return result
}
}