[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
Re: [tor-dev] Tor and IP2Location LITE
On Sun, Aug 20, 2017 at 10:02:20PM +0200, Karsten Loesing wrote:
> Okay. Maybe we could do something with archive.org in that case. It's
> not that we do have a complete history for MaxMind's files, except that
> we could probably create our own history from Tor's Git repository which
> contains files based on MaxMind's files.
I have a script that walks through the history of tor's git geoip files.
#!/usr/bin/env python
import datetime
import getopt
import os.path
import socket
import subprocess
import sys
# Counts the size of per-country geoip allocations in the tor source code.
#
# Usage: ./scrape-geoip.py ~/src/tor > tor-geoip.csv
#
# ~/src/tor (or whatever the path is) must be a tor source repo; i.e. a clone of
# https://git.torproject.org/tor.git.
def usage(f=sys.stdout):
print >> f, """\
Usage: %s /path/to/tor
""" % sys.argv[0]
def history(dirname, filename):
proc = subprocess.Popen(["git", "log", "--reverse", "--date=short", "--pretty=%H %ad", filename],
cwd=dirname, stdout=subprocess.PIPE)
return proc.stdout
def git_show(dirname, filename, commithash):
proc = subprocess.Popen(["git", "show", commithash+":"+filename],
cwd=dirname, stdout=subprocess.PIPE)
return proc.stdout
def parse_geoip(f):
ccs = {}
for line in f:
if line.startswith("#"):
continue
parts = line.strip().split(",")
start = int(parts[0])
end = int(parts[1])
cc = parts[2].lower()
ccs.setdefault(cc, 0)
ccs[cc] += end - start + 1
return ccs
def ipv6_to_int(ipstr):
return long("0x" + socket.inet_pton(socket.AF_INET6, ipstr).encode("hex"), 16)
def parse_geoip6(f):
ccs = {}
for line in f:
if line.startswith("#"):
continue
parts = line.strip().split(",")
start = ipv6_to_int(parts[0])
end = ipv6_to_int(parts[1])
cc = parts[2].lower()
ccs.setdefault(cc, 0)
ccs[cc] += end - start + 1
return ccs
opts, args = getopt.gnu_getopt(sys.argv[1:], "h", ["help"])
for o, a in opts:
if o == "-h" or o == "--help":
usage()
sys.exit()
try:
TOR_PATH, = args
except ValueError:
usage(sys.stderr)
sys.exit(1)
print "date,ipv,country,count"
for line in history(TOR_PATH, "src/config/geoip"):
parts = line.strip().split()
commithash = parts[0]
date = datetime.datetime.strptime(parts[1], "%Y-%m-%d")
try:
ccs = parse_geoip(git_show(TOR_PATH, "src/config/geoip", commithash))
except Exception, e:
print >> sys.stderr, "Skipping %s %s: %s" % ("src/config/geoip", commithash, e)
continue
for cc, count in sorted(ccs.items()):
print ",".join([date.strftime("%Y-%m-%d"), "4", cc, str(count)])
for line in history(TOR_PATH, "src/config/geoip6"):
parts = line.strip().split()
commithash = parts[0]
date = datetime.datetime.strptime(parts[1], "%Y-%m-%d")
try:
ccs = parse_geoip6(git_show(TOR_PATH, "src/config/geoip6", commithash))
except Exception, e:
print >> sys.stderr, "Skipping %s %s: %s" % ("src/config/geoip6", commithash, e)
continue
for cc, count in sorted(ccs.items()):
print ",".join([date.strftime("%Y-%m-%d"), "6", cc, str(count)])
_______________________________________________
tor-dev mailing list
tor-dev@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-dev