1
0
mirror of https://github.com/v2fly/v2ray-core.git synced 2024-12-30 05:56:54 -05:00

Refactor: memory-efficient geo file decoder (#934)

This commit is contained in:
Loyalsoldier 2021-05-01 07:59:29 +08:00 committed by GitHub
parent 408b5adc1c
commit 7e876709fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 445 additions and 146 deletions

View File

@ -5,13 +5,11 @@ import (
"io/fs"
"os"
"path/filepath"
"strings"
"testing"
"google.golang.org/protobuf/proto"
"github.com/v2fly/v2ray-core/v4/app/router"
"github.com/v2fly/v2ray-core/v4/common"
"github.com/v2fly/v2ray-core/v4/common/geodata"
"github.com/v2fly/v2ray-core/v4/common/net"
"github.com/v2fly/v2ray-core/v4/common/platform"
"github.com/v2fly/v2ray-core/v4/common/platform/filesystem"
@ -171,7 +169,7 @@ func TestGeoIPReverseMatcher(t *testing.T) {
}
func TestGeoIPMatcher4CN(t *testing.T) {
ips, err := loadGeoIP("CN")
ips, err := geodata.LoadIP("geoip.dat", "CN")
common.Must(err)
matcher := &router.GeoIPMatcher{}
@ -183,7 +181,7 @@ func TestGeoIPMatcher4CN(t *testing.T) {
}
func TestGeoIPMatcher6US(t *testing.T) {
ips, err := loadGeoIP("US")
ips, err := geodata.LoadIP("geoip.dat", "US")
common.Must(err)
matcher := &router.GeoIPMatcher{}
@ -194,27 +192,8 @@ func TestGeoIPMatcher6US(t *testing.T) {
}
}
func loadGeoIP(country string) ([]*router.CIDR, error) {
geoipBytes, err := filesystem.ReadAsset("geoip.dat")
if err != nil {
return nil, err
}
var geoipList router.GeoIPList
if err := proto.Unmarshal(geoipBytes, &geoipList); err != nil {
return nil, err
}
for _, geoip := range geoipList.Entry {
if strings.EqualFold(geoip.CountryCode, country) {
return geoip.Cidr, nil
}
}
panic("country not found: " + country)
}
func BenchmarkGeoIPMatcher4CN(b *testing.B) {
ips, err := loadGeoIP("CN")
ips, err := geodata.LoadIP("geoip.dat", "CN")
common.Must(err)
matcher := &router.GeoIPMatcher{}
@ -228,7 +207,7 @@ func BenchmarkGeoIPMatcher4CN(b *testing.B) {
}
func BenchmarkGeoIPMatcher6US(b *testing.B) {
ips, err := loadGeoIP("US")
ips, err := geodata.LoadIP("geoip.dat", "US")
common.Must(err)
matcher := &router.GeoIPMatcher{}

View File

@ -6,13 +6,11 @@ import (
"os"
"path/filepath"
"strconv"
"strings"
"testing"
"google.golang.org/protobuf/proto"
"github.com/v2fly/v2ray-core/v4/app/router"
"github.com/v2fly/v2ray-core/v4/common"
"github.com/v2fly/v2ray-core/v4/common/geodata"
"github.com/v2fly/v2ray-core/v4/common/net"
"github.com/v2fly/v2ray-core/v4/common/platform"
"github.com/v2fly/v2ray-core/v4/common/platform/filesystem"
@ -352,26 +350,8 @@ func TestRoutingRule(t *testing.T) {
}
}
func loadGeoSite(country string) ([]*router.Domain, error) {
geositeBytes, err := filesystem.ReadAsset("geosite.dat")
if err != nil {
return nil, err
}
var geositeList router.GeoSiteList
if err := proto.Unmarshal(geositeBytes, &geositeList); err != nil {
return nil, err
}
for _, site := range geositeList.Entry {
if strings.EqualFold(site.CountryCode, country) {
return site.Domain, nil
}
}
return nil, errors.New("country not found: " + country)
}
func TestChinaSites(t *testing.T) {
domains, err := loadGeoSite("CN")
domains, err := geodata.LoadSite("geosite.dat", "CN")
common.Must(err)
matcher, err := router.NewDomainMatcher(domains)
@ -418,7 +398,7 @@ func TestChinaSites(t *testing.T) {
}
func BenchmarkMphDomainMatcher(b *testing.B) {
domains, err := loadGeoSite("CN")
domains, err := geodata.LoadSite("geosite.dat", "CN")
common.Must(err)
matcher, err := router.NewMphMatcherGroup(domains)
@ -460,7 +440,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
}
func BenchmarkDomainMatcher(b *testing.B) {
domains, err := loadGeoSite("CN")
domains, err := geodata.LoadSite("geosite.dat", "CN")
common.Must(err)
matcher, err := router.NewDomainMatcher(domains)
@ -505,7 +485,7 @@ func BenchmarkMultiGeoIPMatcher(b *testing.B) {
var geoips []*router.GeoIP
{
ips, err := loadGeoIP("CN")
ips, err := geodata.LoadIP("geoip.dat", "CN")
common.Must(err)
geoips = append(geoips, &router.GeoIP{
CountryCode: "CN",
@ -514,7 +494,7 @@ func BenchmarkMultiGeoIPMatcher(b *testing.B) {
}
{
ips, err := loadGeoIP("JP")
ips, err := geodata.LoadIP("geoip.dat", "JP")
common.Must(err)
geoips = append(geoips, &router.GeoIP{
CountryCode: "JP",
@ -523,7 +503,7 @@ func BenchmarkMultiGeoIPMatcher(b *testing.B) {
}
{
ips, err := loadGeoIP("CA")
ips, err := geodata.LoadIP("geoip.dat", "CA")
common.Must(err)
geoips = append(geoips, &router.GeoIP{
CountryCode: "CA",
@ -532,7 +512,7 @@ func BenchmarkMultiGeoIPMatcher(b *testing.B) {
}
{
ips, err := loadGeoIP("US")
ips, err := geodata.LoadIP("geoip.dat", "US")
common.Must(err)
geoips = append(geoips, &router.GeoIP{
CountryCode: "US",

140
common/geodata/cache.go Normal file
View File

@ -0,0 +1,140 @@
package geodata
import (
"io/ioutil"
"runtime"
"strings"
"google.golang.org/protobuf/proto"
"github.com/v2fly/v2ray-core/v4/app/router"
"github.com/v2fly/v2ray-core/v4/common/platform"
)
type GeoIPCache map[string]*router.GeoIP
func (g GeoIPCache) Has(key string) bool {
return !(g.Get(key) == nil)
}
func (g GeoIPCache) Get(key string) *router.GeoIP {
if g == nil {
return nil
}
return g[key]
}
func (g GeoIPCache) Set(key string, value *router.GeoIP) {
if g == nil {
g = make(map[string]*router.GeoIP)
}
g[key] = value
}
func (g GeoIPCache) Unmarshal(filename, code string) (*router.GeoIP, error) {
filename = platform.GetAssetLocation(filename)
idx := strings.ToUpper(filename + "|" + code)
if g.Has(idx) {
return g.Get(idx), nil
}
geoipBytes, err := Decode(filename, code)
switch err {
case nil:
var geoip router.GeoIP
if err := proto.Unmarshal(geoipBytes, &geoip); err != nil {
return nil, err
}
g.Set(idx, &geoip)
return &geoip, nil
case errFailedToReadBytes, errFailedToReadExpectedLenBytes,
errInvalidGeodataFile, errInvalidGeodataVarintLength:
newError("failed to decode geodata file: ", filename, ". Fallback to the original ReadFile method.").AtWarning().WriteToLog()
geoipBytes, err = ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
var geoipList router.GeoIPList
if err := proto.Unmarshal(geoipBytes, &geoipList); err != nil {
return nil, err
}
runtime.GC()
for _, geoip := range geoipList.GetEntry() {
if strings.EqualFold(code, geoip.GetCountryCode()) {
g.Set(idx, geoip)
return geoip, nil
}
runtime.GC()
}
default:
return nil, err
}
return nil, nil
}
type GeoSiteCache map[string]*router.GeoSite
func (g GeoSiteCache) Has(key string) bool {
return !(g.Get(key) == nil)
}
func (g GeoSiteCache) Get(key string) *router.GeoSite {
if g == nil {
return nil
}
return g[key]
}
func (g GeoSiteCache) Set(key string, value *router.GeoSite) {
if g == nil {
g = make(map[string]*router.GeoSite)
}
g[key] = value
}
func (g GeoSiteCache) Unmarshal(filename, code string) (*router.GeoSite, error) {
filename = platform.GetAssetLocation(filename)
idx := strings.ToUpper(filename + "|" + code)
if g.Has(idx) {
return g.Get(idx), nil
}
geositeBytes, err := Decode(filename, code)
switch err {
case nil:
var geosite router.GeoSite
if err := proto.Unmarshal(geositeBytes, &geosite); err != nil {
return nil, err
}
g.Set(idx, &geosite)
return &geosite, nil
case errFailedToReadBytes, errFailedToReadExpectedLenBytes,
errInvalidGeodataFile, errInvalidGeodataVarintLength:
newError("failed to decode geodata file: ", filename, ". Fallback to the original ReadFile method.").AtWarning().WriteToLog()
geositeBytes, err = ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
var geositeList router.GeoSiteList
if err := proto.Unmarshal(geositeBytes, &geositeList); err != nil {
return nil, err
}
runtime.GC()
for _, geosite := range geositeList.GetEntry() {
if strings.EqualFold(code, geosite.GetCountryCode()) {
g.Set(idx, geosite)
return geosite, nil
}
runtime.GC()
}
default:
return nil, err
}
return nil, nil
}

116
common/geodata/decode.go Normal file
View File

@ -0,0 +1,116 @@
// Package geodata includes utilities to decode and parse the geoip & geosite dat files for V2Ray.
//
// It relies on the proto structure of GeoIP, GeoIPList, GeoSite and GeoSiteList in
// github.com/v2fly/v2ray-core/v4/app/router/config.proto to comply with following rules:
//
// 1. GeoIPList and GeoSiteList cannot be changed
// 2. The country_code in GeoIP and GeoSite must be
// a length-delimited `string`(wired type) and has field_number set to 1
//
package geodata
import (
"os"
"runtime"
"strings"
"google.golang.org/protobuf/encoding/protowire"
"github.com/v2fly/v2ray-core/v4/common/errors"
)
//go:generate go run github.com/v2fly/v2ray-core/v4/common/errors/errorgen
var (
errFailedToReadBytes = errors.New("failed to read bytes")
errFailedToReadExpectedLenBytes = errors.New("failed to read expected length of bytes")
errInvalidGeodataFile = errors.New("invalid geodata file")
errInvalidGeodataVarintLength = errors.New("invalid geodata varint length")
)
func emitBytes(f *os.File, code string) ([]byte, error) {
count := 1
isInner := false
tempContainer := make([]byte, 0, 5)
var result []byte
var advancedN uint64 = 1
var geoDataVarintLength, codeVarintLength, varintLenByteLen uint64 = 0, 0, 0
Loop:
for {
container := make([]byte, advancedN)
bytesRead, err := f.Read(container)
if err != nil {
return nil, errFailedToReadBytes
}
if bytesRead != len(container) {
return nil, errFailedToReadExpectedLenBytes
}
switch count {
case 1, 3: // data type ((field_number << 3) | wire_type)
if container[0] != 10 { // byte `0A` equals to `10` in decimal
return nil, errInvalidGeodataFile
}
advancedN = 1
count++
case 2, 4: // data length
tempContainer = append(tempContainer, container...)
if container[0] > 127 { // max one-byte-length byte `7F`(0FFF FFFF) equals to `127` in decimal
advancedN = 1
goto Loop
}
lenVarint, n := protowire.ConsumeVarint(tempContainer)
if n < 0 {
return nil, errInvalidGeodataVarintLength
}
tempContainer = nil
if !isInner {
isInner = true
geoDataVarintLength = lenVarint
advancedN = 1
} else {
isInner = false
codeVarintLength = lenVarint
varintLenByteLen = uint64(n)
advancedN = codeVarintLength
}
count++
case 5: // data value
if strings.EqualFold(string(container), code) {
count++
offset := -(1 + int64(varintLenByteLen) + int64(codeVarintLength))
f.Seek(offset, 1) // back to the start of GeoIP or GeoSite varint
advancedN = geoDataVarintLength // the number of bytes to be read in next round
} else {
count = 1
offset := int64(geoDataVarintLength) - int64(codeVarintLength) - int64(varintLenByteLen) - 1
f.Seek(offset, 1) // skip the unmatched GeoIP or GeoSite varint
advancedN = 1 // the next round will be the start of another GeoIPList or GeoSiteList
}
case 6: // matched GeoIP or GeoSite varint
result = container
break Loop
}
runtime.GC() // run GC every round to save memory
}
runtime.GC() // run GC at the end to save memory
return result, nil
}
func Decode(filename, code string) ([]byte, error) {
f, err := os.Open(filename)
if err != nil {
return nil, newError("failed to open file: ", filename).Base(err)
}
defer f.Close()
geoBytes, err := emitBytes(f, code)
if err != nil {
return nil, err
}
return geoBytes, nil
}

View File

@ -0,0 +1,76 @@
package geodata_test
import (
"errors"
"io/fs"
"os"
"path/filepath"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/v2fly/v2ray-core/v4/common"
"github.com/v2fly/v2ray-core/v4/common/geodata"
"github.com/v2fly/v2ray-core/v4/common/platform"
"github.com/v2fly/v2ray-core/v4/common/platform/filesystem"
)
const (
geoipURL = "https://raw.githubusercontent.com/v2fly/geoip/release/geoip.dat"
geositeURL = "https://raw.githubusercontent.com/v2fly/domain-list-community/release/dlc.dat"
)
func init() {
wd, err := os.Getwd()
common.Must(err)
tempPath := filepath.Join(wd, "..", "..", "testing", "temp")
geoipPath := filepath.Join(tempPath, "geoip.dat")
geositePath := filepath.Join(tempPath, "geosite.dat")
os.Setenv("v2ray.location.asset", tempPath)
common.Must(os.MkdirAll(tempPath, 0755))
if _, err := os.Stat(platform.GetAssetLocation("geoip.dat")); err != nil && errors.Is(err, fs.ErrNotExist) {
if _, err := os.Stat(geoipPath); err != nil && errors.Is(err, fs.ErrNotExist) {
geoipBytes, err := common.FetchHTTPContent(geoipURL)
common.Must(err)
common.Must(filesystem.WriteFile(geoipPath, geoipBytes))
}
}
if _, err := os.Stat(platform.GetAssetLocation("geosite.dat")); err != nil && errors.Is(err, fs.ErrNotExist) {
if _, err := os.Stat(geositePath); err != nil && errors.Is(err, fs.ErrNotExist) {
geositeBytes, err := common.FetchHTTPContent(geositeURL)
common.Must(err)
common.Must(filesystem.WriteFile(geositePath, geositeBytes))
}
}
}
func TestDecodeGeoIP(t *testing.T) {
filename := platform.GetAssetLocation("geoip.dat")
result, err := geodata.Decode(filename, "test")
if err != nil {
t.Error(err)
}
expected := []byte{10, 4, 84, 69, 83, 84, 18, 8, 10, 4, 127, 0, 0, 0, 16, 8}
if cmp.Diff(result, expected) != "" {
t.Errorf("failed to load geoip:test, expected: %v, got: %v", expected, result)
}
}
func TestDecodeGeoSite(t *testing.T) {
filename := platform.GetAssetLocation("geosite.dat")
result, err := geodata.Decode(filename, "test")
if err != nil {
t.Error(err)
}
expected := []byte{10, 4, 84, 69, 83, 84, 18, 20, 8, 3, 18, 16, 116, 101, 115, 116, 46, 101, 120, 97, 109, 112, 108, 101, 46, 99, 111, 109}
if cmp.Diff(result, expected) != "" {
t.Errorf("failed to load geosite:test, expected: %v, got: %v", expected, result)
}
}

View File

@ -0,0 +1,9 @@
package geodata
import "github.com/v2fly/v2ray-core/v4/common/errors"
type errPathObjHolder struct{}
func newError(values ...interface{}) *errors.Error {
return errors.New(values...).WithPathObj(errPathObjHolder{})
}

28
common/geodata/load.go Normal file
View File

@ -0,0 +1,28 @@
package geodata
import (
"runtime"
"github.com/v2fly/v2ray-core/v4/app/router"
)
var geoipcache GeoIPCache = make(map[string]*router.GeoIP)
var geositecache GeoSiteCache = make(map[string]*router.GeoSite)
func LoadIP(filename, country string) ([]*router.CIDR, error) {
geoip, err := geoipcache.Unmarshal(filename, country)
if err != nil {
return nil, newError("failed to decode geodata file: ", filename).Base(err)
}
runtime.GC()
return geoip.Cidr, nil
}
func LoadSite(filename, list string) ([]*router.Domain, error) {
geosite, err := geositecache.Unmarshal(filename, list)
if err != nil {
return nil, newError("failed to decode geodata file: ", filename).Base(err)
}
runtime.GC()
return geosite.Domain, nil
}

58
infra/conf/geodata.go Normal file
View File

@ -0,0 +1,58 @@
package conf
import (
"runtime"
"strings"
"github.com/v2fly/v2ray-core/v4/app/router"
"github.com/v2fly/v2ray-core/v4/common/geodata"
)
func loadGeoIP(country string) ([]*router.CIDR, error) {
return geodata.LoadIP("geoip.dat", country)
}
func loadGeosite(list string) ([]*router.Domain, error) {
return loadGeositeWithAttr("geosite.dat", list)
}
func loadGeositeWithAttr(filename string, siteWithAttr string) ([]*router.Domain, error) {
parts := strings.Split(siteWithAttr, "@")
if len(parts) == 0 {
return nil, newError("empty rule")
}
list := strings.TrimSpace(parts[0])
attrVal := parts[1:]
if len(list) == 0 {
return nil, newError("empty listname in rule: ", siteWithAttr)
}
domains, err := geodata.LoadSite(filename, list)
if err != nil {
return nil, err
}
attrs := parseAttrs(attrVal)
if attrs.IsEmpty() {
if strings.Contains(siteWithAttr, "@") {
newError("empty attribute list: ", siteWithAttr)
}
return domains, nil
}
filteredDomains := make([]*router.Domain, 0, len(domains))
hasAttrMatched := false
for _, domain := range domains {
if attrs.Match(domain) {
hasAttrMatched = true
filteredDomains = append(filteredDomains, domain)
}
}
if !hasAttrMatched {
newError("attribute match no rule: geosite:", siteWithAttr)
}
runtime.GC()
return filteredDomains, nil
}

View File

@ -2,6 +2,7 @@ package conf
import (
"github.com/golang/protobuf/proto"
"github.com/v2fly/v2ray-core/v4/app/observatory"
)

View File

@ -5,11 +5,9 @@ import (
"strconv"
"strings"
"github.com/golang/protobuf/proto"
"github.com/v2fly/v2ray-core/v4/app/router"
"github.com/v2fly/v2ray-core/v4/common/geodata"
"github.com/v2fly/v2ray-core/v4/common/net"
"github.com/v2fly/v2ray-core/v4/common/platform/filesystem"
)
type RouterRulesConfig struct {
@ -174,48 +172,6 @@ func ParseIP(s string) (*router.CIDR, error) {
}
}
func loadGeoIP(country string) ([]*router.CIDR, error) {
return loadIP("geoip.dat", country)
}
func loadIP(filename, country string) ([]*router.CIDR, error) {
geoipBytes, err := filesystem.ReadAsset(filename)
if err != nil {
return nil, newError("failed to open file: ", filename).Base(err)
}
var geoipList router.GeoIPList
if err := proto.Unmarshal(geoipBytes, &geoipList); err != nil {
return nil, err
}
for _, geoip := range geoipList.Entry {
if strings.EqualFold(geoip.CountryCode, country) {
return geoip.Cidr, nil
}
}
return nil, newError("country not found in ", filename, ": ", country)
}
func loadSite(filename, list string) ([]*router.Domain, error) {
geositeBytes, err := filesystem.ReadAsset(filename)
if err != nil {
return nil, newError("failed to open file: ", filename).Base(err)
}
var geositeList router.GeoSiteList
if err := proto.Unmarshal(geositeBytes, &geositeList); err != nil {
return nil, err
}
for _, site := range geositeList.Entry {
if strings.EqualFold(site.CountryCode, list) {
return site.Domain, nil
}
}
return nil, newError("list not found in ", filename, ": ", list)
}
type AttributeMatcher interface {
Match(*router.Domain) bool
}
@ -260,50 +216,6 @@ func parseAttrs(attrs []string) *AttributeList {
return al
}
func loadGeosite(list string) ([]*router.Domain, error) {
return loadGeositeWithAttr("geosite.dat", list)
}
func loadGeositeWithAttr(file string, siteWithAttr string) ([]*router.Domain, error) {
parts := strings.Split(siteWithAttr, "@")
if len(parts) == 0 {
return nil, newError("empty rule")
}
list := strings.TrimSpace(parts[0])
attrVal := parts[1:]
if len(list) == 0 {
return nil, newError("empty listname in rule: ", siteWithAttr)
}
domains, err := loadSite(file, list)
if err != nil {
return nil, err
}
attrs := parseAttrs(attrVal)
if attrs.IsEmpty() {
if strings.Contains(siteWithAttr, "@") {
newError("empty attribute list: ", siteWithAttr)
}
return domains, nil
}
filteredDomains := make([]*router.Domain, 0, len(domains))
hasAttrMatched := false
for _, domain := range domains {
if attrs.Match(domain) {
hasAttrMatched = true
filteredDomains = append(filteredDomains, domain)
}
}
if !hasAttrMatched {
newError("attribute match no rule: geosite:", siteWithAttr)
}
return filteredDomains, nil
}
func parseDomainRule(domain string) ([]*router.Domain, error) {
if strings.HasPrefix(domain, "geosite:") {
list := domain[8:]
@ -414,7 +326,7 @@ func toCidrList(ips StringList) ([]*router.GeoIP, error) {
}
geoip, err := loadGeoIP(country)
if err != nil {
return nil, newError("failed to load geoip: ", country).Base(err)
return nil, newError("failed to load geoip:", country).Base(err)
}
geoipList = append(geoipList, &router.GeoIP{
@ -455,9 +367,9 @@ func toCidrList(ips StringList) ([]*router.GeoIP, error) {
country = country[1:]
isReverseMatch = true
}
geoip, err := loadIP(filename, country)
geoip, err := geodata.LoadIP(filename, country)
if err != nil {
return nil, newError("failed to load geoip: ", country, " from ", filename).Base(err)
return nil, newError("failed to load geoip:", country, " from ", filename).Base(err)
}
geoipList = append(geoipList, &router.GeoIP{