[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

Re: [tor-dev] Tor and IP2Location LITE



On Sun, Aug 20, 2017 at 10:02:20PM +0200, Karsten Loesing wrote:
> Okay. Maybe we could do something with archive.org in that case. It's
> not that we do have a complete history for MaxMind's files, except that
> we could probably create our own history from Tor's Git repository which
> contains files based on MaxMind's files.

I have a script that walks through the history of tor's git geoip files.
#!/usr/bin/env python

import datetime
import getopt
import os.path
import socket
import subprocess
import sys

# Counts the size of per-country geoip allocations in the tor source code.
#
# Usage: ./scrape-geoip.py ~/src/tor > tor-geoip.csv
#
# ~/src/tor (or whatever the path is) must be a tor source repo; i.e. a clone of
# https://git.torproject.org/tor.git.

def usage(f=sys.stdout):
    print >> f, """\
Usage: %s /path/to/tor
""" % sys.argv[0]

def history(dirname, filename):
    proc = subprocess.Popen(["git", "log", "--reverse", "--date=short", "--pretty=%H %ad", filename],
        cwd=dirname, stdout=subprocess.PIPE)
    return proc.stdout

def git_show(dirname, filename, commithash):
    proc = subprocess.Popen(["git", "show", commithash+":"+filename],
        cwd=dirname, stdout=subprocess.PIPE)
    return proc.stdout

def parse_geoip(f):
    ccs = {}
    for line in f:
        if line.startswith("#"):
            continue
        parts = line.strip().split(",")
        start = int(parts[0])
        end = int(parts[1])
        cc = parts[2].lower()
        ccs.setdefault(cc, 0)
        ccs[cc] += end - start + 1
    return ccs

def ipv6_to_int(ipstr):
    return long("0x" + socket.inet_pton(socket.AF_INET6, ipstr).encode("hex"), 16)

def parse_geoip6(f):
    ccs = {}
    for line in f:
        if line.startswith("#"):
            continue
        parts = line.strip().split(",")
        start = ipv6_to_int(parts[0])
        end = ipv6_to_int(parts[1])
        cc = parts[2].lower()
        ccs.setdefault(cc, 0)
        ccs[cc] += end - start + 1
    return ccs


opts, args = getopt.gnu_getopt(sys.argv[1:], "h", ["help"])
for o, a in opts:
    if o == "-h" or o == "--help":
        usage()
        sys.exit()

try:
    TOR_PATH, = args
except ValueError:
    usage(sys.stderr)
    sys.exit(1)

print "date,ipv,country,count"

for line in history(TOR_PATH, "src/config/geoip"):
    parts = line.strip().split()
    commithash = parts[0]
    date = datetime.datetime.strptime(parts[1], "%Y-%m-%d")

    try:
        ccs = parse_geoip(git_show(TOR_PATH, "src/config/geoip", commithash))
    except Exception, e:
        print >> sys.stderr, "Skipping %s %s: %s" % ("src/config/geoip", commithash, e)
        continue
    for cc, count in sorted(ccs.items()):
        print ",".join([date.strftime("%Y-%m-%d"), "4", cc, str(count)])

for line in history(TOR_PATH, "src/config/geoip6"):
    parts = line.strip().split()
    commithash = parts[0]
    date = datetime.datetime.strptime(parts[1], "%Y-%m-%d")

    try:
        ccs = parse_geoip6(git_show(TOR_PATH, "src/config/geoip6", commithash))
    except Exception, e:
        print >> sys.stderr, "Skipping %s %s: %s" % ("src/config/geoip6", commithash, e)
        continue
    for cc, count in sorted(ccs.items()):
        print ",".join([date.strftime("%Y-%m-%d"), "6", cc, str(count)])
_______________________________________________
tor-dev mailing list
tor-dev@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-dev