On Fri, 1 Apr 2011 17:12:20 -0700 Ian Foster <mrlanrat@xxxxxxxxx> wrote: > I've created a simple python parser for Tor that will generate a csv > file from Tor's cached-descriptors and cached-consensus files. > It does not get all the data it should but it is only a first revision. > The purpose of this was to more familiarize myself with Tor for the > Google Summer of Code, Hope it is useful! > Get it here: https://github.com/mrlanrat/TorExport > > grarpamp, I hope this helps you a little, If I can I will expand the > data that this script will collect. > > Moritz Bartl, your script was very useful, I used it s a base, but > modified it a bit so that it would run on python 3. No shit. Your substantive contribution to TorExport consists of less than 10 new lines near the end -- diff attached. (I normalized the leading whitespace in both files with âexpand -t 4â first.) Robert Ransom
--- moba-tormap.py 2011-04-01 20:00:56.000000000 -0700 +++ mrlanrat-torexport.py 2011-04-01 20:01:15.000000000 -0700 @@ -1,41 +1,46 @@ #!/usr/bin/env python # encoding: utf-8 - -''' - quick and dirty hack Moritz Bartl moritz@xxxxxxxxxxxxxx - 13.12.2010 - - let me know and send me your changes if you improve anything - - requires: - - pygeoip, http://code.google.com/p/pygeoip/ - - geoIP city database, eg. http://www.maxmind.com/app/geolitecity - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License (LGPL) - as published by the Free Software Foundation, either version 3 of the - License, or any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - http://www.gnu.org/licenses/ ''' +Script to parse torfiles for nodes and export csv +4/1/2010 +Ian Foster -FAST = 1000000 +requires python 3 + +Built using code from: +https://github.com/moba/tormap/blob/master/tormap.py + +TorExport is free software: you can redistribute it and/or modify it under the terms +of the GNU General Public License as published by the Free Software Foundation, +either version 3 of the License, or (at your option) any later version. + +TorExport is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the GNU General Public License for more details. + +http://www.gnu.org/licenses/. +''' -import base64, shelve, pygeoip, cgi, re -from operator import attrgetter, itemgetter -from string import Template +import base64 +import cgi +import csv +import sys + +try: + sys.argv[1] +except IndexError: + print('Creates a CSV of all online nodes') + print('Please Pass folder containing cached-descriptors and cached-consensus files') + print('usage: torexport.py /path/to/tor/data') + exit() +FAST = 1000000 cachedRelays = dict() currentRouter = dict() # parse cached-descriptors to extract uptime and announced bandwidth -with open('cached-descriptors') as f: - for line in f: +with open(sys.argv[1]+ '/cached-descriptors') as f: + for line in f: line = line.strip() if line.startswith('router '): [nil,name,ip,orport,socksport,dirport] = line.split() @@ -48,13 +53,13 @@ currentRouter['version']=line[9:] if line.startswith('opt fingerprint'): fingerprint=line[16:] - currentRouter['fingerprint'] = fingerprint.replace(' ','').lower() + currentRouter['fingerprint'] = str(fingerprint.replace(' ','').lower()) if line.startswith('uptime '): currentRouter['uptime']=line[7:] if line.startswith('bandwidth '): currentRouter['bandwidth'] = line[10:] try: - currentRouter['bw-observed'] = int(line.split()[3]) + currentRouter['bw-observed'] = int(line.split()[3]) except: pass bandwidth = line[10:] @@ -65,8 +70,6 @@ cachedRelays[fingerprint] = currentRouter currentRouter = dict() -# parse cached-consensus for flags and correlate to descriptors - badRelays = dict() # Bad in flags, eg. BadExit, BadDirectory exitFastRelays = dict() # Exit flag, >= FAST exitRelays = dict() # Exit flag, slower than FAST @@ -74,57 +77,56 @@ stableRelays = dict() # Stable flag, but not Exit otherRelays = dict() # non Stable, non Exit -count = 0 -with open('cached-consensus') as f: - for line in f: +# parse cached-consensus for flags and correlate to descriptors +with open(sys.argv[1]+'/cached-consensus') as f: + for line in f: line = line.strip() if line.startswith('r '): [nil,name,identity,digest,date,time,ip,orport,dirport] = line.split() identity = identity.strip() - fingerprint = base64.decodestring(identity + '=\n').encode('hex') - # php: unpack('H*',decode_base64($identity)) + fingerprint = base64.decodestring((identity+ '=\n').encode('ascii')) currentRouter = dict() if fingerprint in cachedRelays: currentRouter = cachedRelays[fingerprint] # trust consensus more than cached-descriptors, replace info - currentRouter['fingerprint'] = fingerprint + currentRouter['fingerprint'] = str(fingerprint) currentRouter['name'] = name currentRouter['ip'] = ip currentRouter['orport'] = orport currentRouter['dirport'] = dirport if line.startswith('p '): currentRouter['policy'] = line[2:] - if line.startswith('s '): + if line.startswith('s '): flags = line[2:] - currentRouter['flags'] = flags - if flags.find('Bad')>-1: + currentRouter['flags'] = flags + if flags.find('Bad')>-1: badRelays[fingerprint] = currentRouter elif flags.find('Exit')>-1: - if currentRouter.has_key('bw-observed') and currentRouter['bw-observed']>FAST: + if 'bw-observed' in currentRouter and currentRouter['bw-observed']>FAST: exitFastRelays[fingerprint] = currentRouter else: exitRelays[fingerprint] = currentRouter elif flags.find('Stable')>-1: - if currentRouter.has_key('bw-observed') and currentRouter['bw-observed']>FAST: + if 'bw-observed' in currentRouter and currentRouter['bw-observed']>FAST: stableFastRelays[fingerprint] = currentRouter else: stableRelays[fingerprint] = currentRouter else: otherRelays[fingerprint] = currentRouter -print 'Bad:', len(badRelays) -print 'Exit:', len(exitRelays) -print 'Fast exit:', len(exitFastRelays) -print 'Non-exit stable:', len(stableRelays) -print 'Fast non-exit stable:', len(stableFastRelays) -print 'Other:', len(otherRelays) +print('Bad:', len(badRelays)) +print('Exit:', len(exitRelays)) +print('Fast exit:', len(exitFastRelays)) +print('Non-exit stable:', len(stableRelays)) +print('Fast non-exit stable:', len(stableFastRelays)) +print('Other:', len(otherRelays)) inConsensus = len(badRelays)+len(exitRelays)+len(stableRelays)+len(otherRelays) -print '[ in consensus:', inConsensus, ']' +print('[ in consensus:', inConsensus, ']') notInConsensus = len(cachedRelays)-len(badRelays)-len(exitRelays)-len(stableRelays)-len(otherRelays) -print '[ cached descriptors not in consensus:', notInConsensus, ']' +print('[ cached descriptors not in consensus:', notInConsensus, ']') -# put all relays we want to plot in one list for geoIP +# put all relays we want to plot in one list for csv allRelays = dict() allRelays.update(exitRelays) allRelays.update(exitFastRelays) @@ -132,115 +134,12 @@ allRelays.update(stableFastRelays) allRelays.update(otherRelays) -# geoIP -geoIPcache = shelve.open('geoip-cache') -geoIPdb = None - -for relay in allRelays.values(): - ip = relay['ip'] - if geoIPcache.has_key(ip): - info = geoIPcache[ip] - else: - if geoIPdb is None: - geoIPdb = pygeoip.GeoIP('GeoLiteCity.dat') - info = geoIPdb.record_by_addr(ip) - geoIPcache[ip] = info - relay['location'] = info - relay['latitude'] = info['latitude'] - relay['longitude'] = info['longitude'] - -geoIPcache.close() - -# generate KML - -placemarkTemplate = Template ('<Placemark>\n\ - <name>$name</name>\n\ - <description>\n\ - <![CDATA[\n\ - <p>IP: <a href="http://tools.whois.net/whoisbyip/$ip">$ip</a> ORPort: $orport DirPort: $dirport</p>\n\ - <p>Bandwidth: $bandwidth</p>\n\ - <p>Flags: $flags</p>\n\ - <p>Uptime: $uptime</p>\n\ - <p>Contact: $contact</p>\n\ - <p>Policy: $policy</p>\n\ - <p>Fingerprint: <a href="http://torstatus.blutmagie.de/router_detail.php?FP=$fingerprint">$prettyFingerprint</a></p>\n\ - <p>Version: $version</p>\n\ - ]]>\n\ - </description>\n\ - <styleUrl>$styleUrl</styleUrl>\n\ - <Point>\n\ - <coordinates>$longitude,$latitude</coordinates>\n\ - </Point>\n\ - </Placemark>\n\ - ') - -kmlBody = () - -def generateFolder(name, styleUrl, relays): - group = '<Folder>\n<name>%s</name>\n' % name - for fingerprint,relay in relays.items(): - # for displaying: pretty fingerprint in blocks of four, uppercase - relay['prettyFingerprint'] = " ".join(filter(None, re.split('(\w{4})', fingerprint.upper()))) - relay['styleUrl'] = styleUrl - placemark = placemarkTemplate.safe_substitute(relay) - group = group + placemark - group = group + "\n</Folder>" - return group - -kmlBody = generateFolder("%s Fast Exits (>= 1MB/s)" % len(exitFastRelays), "#exitFast", exitFastRelays) -kmlBody = kmlBody + generateFolder("%s Exits" % len(exitRelays), "#exit", exitRelays) -kmlBody = kmlBody + generateFolder("%s Fast stable nodes (>= 1MB/s)" % len(stableFastRelays), "#stableFast", stableFastRelays) -kmlBody = kmlBody + generateFolder("%s Stable nodes" % len(stableRelays), "#stable", stableRelays) -kmlBody = kmlBody + generateFolder("%s Other" % len(otherRelays), "#other", otherRelays) - -kml = open('tormap.kml', 'w') - -kmlHeader = ( - '<?xml version="1.0" encoding="UTF-8"?>\n' - '<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2" xmlns:kml="http://www.opengis.net/kml/2.2" xmlns:atom="http://www.w3.org/2005/Atom">\n' - '<Document>\n' - ' <name>Tor relays</name>\n' - ' <Style id="exit">\n' - ' <IconStyle>\n' - ' <Icon>\n' - ' <href>http://maps.google.com/mapfiles/kml/paddle/grn-blank.png</href>\n' - ' </Icon>\n' - ' </IconStyle>\n' - ' </Style>\n' - ' <Style id="exitFast">\n' - ' <IconStyle>\n' - ' <Icon>\n' - ' <href>http://maps.google.com/mapfiles/kml/paddle/red-stars.png</href>\n' - ' </Icon>\n' - ' </IconStyle>\n' - ' </Style>\n' - ' <Style id="stable">\n' - ' <IconStyle>\n' - ' <Icon>\n' - ' <href>http://maps.google.com/mapfiles/kml/paddle/ylw-blank.png</href>\n' - ' </Icon>\n' - ' </IconStyle>\n' - ' </Style>\n' - ' <Style id="stableFast">\n' - ' <IconStyle>\n' - ' <Icon>\n' - ' <href>http://maps.google.com/mapfiles/kml/paddle/ylw-stars.png</href>\n' - ' </Icon>\n' - ' </IconStyle>\n' - ' </Style>\n' - ' <Style id="other">\n' - ' <IconStyle>\n' - ' <Icon>\n' - ' <href>http://maps.google.com/mapfiles/kml/paddle/wht-blank.png</href>\n' - ' </Icon>\n' - ' </IconStyle>\n' - ' </Style>\n' - ) - -kmlFooter = ('</Document>\n' - '</kml>\n') - -kml.write(kmlHeader) -kml.write(kmlBody) -kml.write(kmlFooter) -kml.close() \ No newline at end of file + +f = open("nodes.csv", "w",newline='\n') +csvf = csv.writer(f,dialect='excel') +csvf.writerow(list(allRelays.popitem()[1])) #write the first row +for item in allRelays.values(): + csvf.writerow(list(item.values())) + +f.close() +exit() \ No newline at end of file
Attachment:
signature.asc
Description: PGP signature
_______________________________________________ tor-talk mailing list tor-talk@xxxxxxxxxxxxxxxxxxxx https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-talk