diff --git a/tools/generate-ip-mappings.py b/tools/generate-ip-mappings.py index 3a20e3c78..5f3fd86d9 100755 --- a/tools/generate-ip-mappings.py +++ b/tools/generate-ip-mappings.py @@ -1,20 +1,16 @@ #!/usr/bin/env python3 -# Download from https://dev.maxmind.com/geoip/geoip2/geolite2/ -# You need GeoLite2-City-Blocks-IPv4.csv and GeoLite2-City-Locations-en.csv from GeoLite2 City -# license of the DB is CC-BY-SA 4.0 -# -# This product includes GeoLite2 data created by MaxMind, available from -# http://www.maxmind.com - -# usage: generate-ip-mappings.py > ip.csv +# usage: generate-ip-mappings.py +# 2 files ipv4.csv and ipv6.csv will be generated # in sqlite3 terminal: # # .mode csv -# .import `full path to ip.csv` ip_mapping +# .import `full path to ipv4.csv` ip_mapping +# .import `full path to ipv6.csv` ipv6_mapping # # For query by ip: # SELECT * FROM ip_mapping WHERE `ip_start` <= ip-in-decimal AND `ip_end` >= ip-in-decimal ORDER BY `ip_start` DESC LIMIT 1; +# SELECT * FROM ipv6_mapping WHERE `ip_start` <= upperIPv6("ipv6_addr") AND `ip_end` >= upperIPv6("ipv6_addr") ORDER BY `ip_start` DESC LIMIT 1; import socket import struct import csv @@ -23,41 +19,56 @@ import sys # import zipfile # import urllib.request -CSV_WEB_LINK = 'http://geolite.maxmind.com/download/geoip/database/GeoLite2-City-CSV.zip' -CSV_FILE = 'GeoLite2-City-Blocks-IPv4.csv' -CSV_LOCATION = 'GeoLite2-City-Locations-en.csv' +def ip2int(addr): + return struct.unpack("!I", socket.inet_aton(addr))[0] -if not os.path.exists(CSV_LOCATION): - print("File = {} does not exist. Download it from = {} ".format(CSV_LOCATION, CSV_WEB_LINK)) - sys.exit(1) +# Keep only the upper 64bit, as we only need that for geolocation +def ipv62int64(addr): + hi, lo = struct.unpack('!QQ', socket.inet_pton(socket.AF_INET6, addr)) + return hi -COUNTRY_DICT = {} -with open(CSV_LOCATION, 'r') as csvfile: - locationlist = csv.reader(csvfile, delimiter=',', quotechar='"') - # Skip header - next(locationlist) - for row in locationlist: - COUNTRY_DICT[row[0]] = row[4] +CSV_WEB_LINK = 'https://download.db-ip.com/free/dbip-city-lite-2020-01.csv.gz' +CSV_FILE = 'dbip-city-lite-2020-01.csv' if not os.path.exists(CSV_FILE): print("File = {} does not exist. Download it from = {} ".format(CSV_FILE, CSV_WEB_LINK)) sys.exit(1) -with open(CSV_FILE, 'r') as csvfile: +# Format: 1.0.0.0,1.0.0.255,OC,AU,Queensland,"South Brisbane",-27.4748,153.017 +with open(CSV_FILE, 'r') as csvfile, open('ipv4.csv', 'w') as ipv4, open('ipv6.csv', 'w') as ipv6: iplist = csv.reader(csvfile, delimiter=',', quotechar='"') - # Skip header - next(iplist) for row in iplist: - if row[7] is "" or row[8] is "": + # Skip reserved range + if row[3] == "ZZ": + continue + # Skip empty latitude and longitude + if row[6] is "" or row[7] is "": continue - ip, net_bits = row[0].split('/') + if row[0].find(':') == -1: + ipv4_line = True + else: + ipv4_line = False - # Convert submask ip to range - ip_start = (struct.unpack("!I", socket.inet_aton(ip)))[0] - ip_end = ip_start + ((1 << (32 - int(net_bits))) - 1) + if ipv4_line: + ip_start = ip2int(row[0]) + ip_end = ip2int(row[1]) + else: + ip_start = ipv62int64(row[0]) + ip_end = ipv62int64(row[1]) - latitude = float(row[7]) - longitude = float(row[8]) - country = COUNTRY_DICT.get(row[1], "") - print('%d,%d,%f,%f,%s' % (ip_start, ip_end, latitude, longitude, country)) + # Some IPv6 entries are duplicated after removing the lower 64bit + if ip_start == ip_end: + continue + + # Sqlite doesn't support unsigned int 64 + _int64_max = pow(2, 63) - 1 + if ip_start > _int64_max or ip_end > _int64_max: + continue + latitude = float(row[6]) + longitude = float(row[7]) + country = row[3] + if ipv4_line: + print('%d,%d,%f,%f,%s' % (ip_start, ip_end, latitude, longitude, country), file = ipv4) + else: + print('%d,%d,%f,%f,%s' % (ip_start, ip_end, latitude, longitude, country), file = ipv6)