[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

Re: [tor-talk] GSOC Ideas.



On Fri, 1 Apr 2011 17:12:20 -0700
Ian Foster <mrlanrat@xxxxxxxxx> wrote:

> I've created a simple python parser for Tor that will generate a csv
> file from Tor's cached-descriptors and cached-consensus files.
> It does not get all the data it should but it is only a first revision.
> The purpose of this was to more familiarize myself with Tor for the
> Google Summer of Code, Hope it is useful!
> Get it here: https://github.com/mrlanrat/TorExport
> 
> grarpamp, I hope this helps you a little, If I can I will expand the
> data that this script will collect.
> 
> Moritz Bartl, your script was very useful, I used it s a base, but
> modified it a bit so that it would run on python 3.

No shit.  Your substantive contribution to TorExport consists of less
than 10 new lines near the end -- diff attached.  (I normalized the
leading whitespace in both files with âexpand -t 4â first.)


Robert Ransom
--- moba-tormap.py	2011-04-01 20:00:56.000000000 -0700
+++ mrlanrat-torexport.py	2011-04-01 20:01:15.000000000 -0700
@@ -1,41 +1,46 @@
 #!/usr/bin/env python
 # encoding: utf-8
-
-'''  
- quick and dirty hack Moritz Bartl moritz@xxxxxxxxxxxxxx
- 13.12.2010
-
- let me know and send me your changes if you improve anything
-
- requires: 
- - pygeoip, http://code.google.com/p/pygeoip/
- - geoIP city database, eg. http://www.maxmind.com/app/geolitecity
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License (LGPL) 
- as published by the Free Software Foundation, either version 3 of the 
- License, or any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU Lesser General Public License for more details.
- 
- http://www.gnu.org/licenses/
 '''
+Script to parse torfiles for nodes and export csv
+4/1/2010
+Ian Foster
 
-FAST = 1000000
+requires python 3
+
+Built using code from:
+https://github.com/moba/tormap/blob/master/tormap.py
+
+TorExport is free software: you can redistribute it and/or modify it under the terms
+of the GNU General Public License as published by the Free Software Foundation, 
+either version 3 of the License, or (at your option) any later version.
+
+TorExport is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 
+PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+http://www.gnu.org/licenses/.
+'''
 
-import base64, shelve, pygeoip, cgi, re
-from operator import attrgetter, itemgetter
-from string import Template
+import base64
+import cgi
+import csv
+import sys
+
+try:
+    sys.argv[1]
+except IndexError:
+    print('Creates a CSV of all online nodes')
+    print('Please Pass folder containing cached-descriptors and cached-consensus files')
+    print('usage: torexport.py /path/to/tor/data')
+    exit()
 
+FAST = 1000000
 cachedRelays = dict()
 currentRouter = dict()
 
 # parse cached-descriptors to extract uptime and announced bandwidth
-with open('cached-descriptors') as f:
-    for line in f:              
+with open(sys.argv[1]+ '/cached-descriptors') as f:
+    for line in f:
         line = line.strip()
         if line.startswith('router '):
             [nil,name,ip,orport,socksport,dirport] = line.split()
@@ -48,13 +53,13 @@
             currentRouter['version']=line[9:]
         if line.startswith('opt fingerprint'):
             fingerprint=line[16:]
-            currentRouter['fingerprint'] = fingerprint.replace(' ','').lower()
+            currentRouter['fingerprint'] = str(fingerprint.replace(' ','').lower())
         if line.startswith('uptime '):
             currentRouter['uptime']=line[7:]
         if line.startswith('bandwidth '):
             currentRouter['bandwidth'] = line[10:]
             try:
-                currentRouter['bw-observed'] = int(line.split()[3])         
+                currentRouter['bw-observed'] = int(line.split()[3])
             except:
                 pass
             bandwidth = line[10:]
@@ -65,8 +70,6 @@
             cachedRelays[fingerprint] = currentRouter
             currentRouter = dict()
 
-# parse cached-consensus for flags and correlate to descriptors
-
 badRelays = dict() # Bad in flags, eg. BadExit, BadDirectory
 exitFastRelays = dict() # Exit flag, >= FAST
 exitRelays = dict() # Exit flag, slower than FAST
@@ -74,57 +77,56 @@
 stableRelays = dict() # Stable flag, but not Exit
 otherRelays = dict() # non Stable, non Exit
 
-count = 0
-with open('cached-consensus') as f:
-    for line in f:                          
+# parse cached-consensus for flags and correlate to descriptors
+with open(sys.argv[1]+'/cached-consensus') as f:
+    for line in f:
         line = line.strip()
         if line.startswith('r '):
             [nil,name,identity,digest,date,time,ip,orport,dirport] = line.split()
             identity = identity.strip()
-            fingerprint = base64.decodestring(identity + '=\n').encode('hex')
-            # php: unpack('H*',decode_base64($identity))
+            fingerprint = base64.decodestring((identity+ '=\n').encode('ascii'))
             currentRouter = dict()
             if fingerprint in cachedRelays:
                 currentRouter = cachedRelays[fingerprint]
             # trust consensus more than cached-descriptors, replace info
-            currentRouter['fingerprint'] = fingerprint
+            currentRouter['fingerprint'] = str(fingerprint)
             currentRouter['name'] = name
             currentRouter['ip'] = ip
             currentRouter['orport'] = orport
             currentRouter['dirport'] = dirport
         if line.startswith('p '):
             currentRouter['policy'] = line[2:]
-        if line.startswith('s '):       
+        if line.startswith('s '):
             flags = line[2:]
-            currentRouter['flags'] = flags          
-            if flags.find('Bad')>-1:    
+            currentRouter['flags'] = flags
+            if flags.find('Bad')>-1:
                 badRelays[fingerprint] = currentRouter
             elif flags.find('Exit')>-1:
-                if currentRouter.has_key('bw-observed') and currentRouter['bw-observed']>FAST:
+                if 'bw-observed' in currentRouter and currentRouter['bw-observed']>FAST:
                     exitFastRelays[fingerprint] = currentRouter
                 else:
                     exitRelays[fingerprint] = currentRouter
             elif flags.find('Stable')>-1:
-                if currentRouter.has_key('bw-observed') and currentRouter['bw-observed']>FAST:
+                if 'bw-observed' in currentRouter and currentRouter['bw-observed']>FAST:
                     stableFastRelays[fingerprint] = currentRouter
                 else:
                     stableRelays[fingerprint] = currentRouter
             else:
                 otherRelays[fingerprint] = currentRouter
 
-print 'Bad:', len(badRelays)
-print 'Exit:', len(exitRelays)
-print 'Fast exit:', len(exitFastRelays)
-print 'Non-exit stable:', len(stableRelays)
-print 'Fast non-exit stable:', len(stableFastRelays)
-print 'Other:', len(otherRelays)
+print('Bad:', len(badRelays))
+print('Exit:', len(exitRelays))
+print('Fast exit:', len(exitFastRelays))
+print('Non-exit stable:', len(stableRelays))
+print('Fast non-exit stable:', len(stableFastRelays))
+print('Other:', len(otherRelays))
 
 inConsensus = len(badRelays)+len(exitRelays)+len(stableRelays)+len(otherRelays)
-print '[ in consensus:', inConsensus, ']'
+print('[ in consensus:', inConsensus, ']')
 notInConsensus = len(cachedRelays)-len(badRelays)-len(exitRelays)-len(stableRelays)-len(otherRelays)
-print '[ cached descriptors not in consensus:', notInConsensus, ']'
+print('[ cached descriptors not in consensus:', notInConsensus, ']')
 
-# put all relays we want to plot in one list for geoIP
+# put all relays we want to plot in one list for csv
 allRelays = dict()
 allRelays.update(exitRelays)
 allRelays.update(exitFastRelays)
@@ -132,115 +134,12 @@
 allRelays.update(stableFastRelays)
 allRelays.update(otherRelays)
 
-# geoIP
-geoIPcache = shelve.open('geoip-cache')
-geoIPdb = None
-
-for relay in allRelays.values():
-    ip = relay['ip']
-    if geoIPcache.has_key(ip):
-        info = geoIPcache[ip]
-    else:
-        if geoIPdb is None:
-            geoIPdb = pygeoip.GeoIP('GeoLiteCity.dat')
-        info = geoIPdb.record_by_addr(ip)
-        geoIPcache[ip] = info
-    relay['location'] = info
-    relay['latitude'] = info['latitude']
-    relay['longitude'] = info['longitude']
-    
-geoIPcache.close()
-
-# generate KML
-
-placemarkTemplate = Template ('<Placemark>\n\
-    <name>$name</name>\n\
-    <description>\n\
-    <![CDATA[\n\
-    <p>IP: <a href="http://tools.whois.net/whoisbyip/$ip";>$ip</a> ORPort: $orport DirPort: $dirport</p>\n\
-    <p>Bandwidth: $bandwidth</p>\n\
-    <p>Flags: $flags</p>\n\
-    <p>Uptime: $uptime</p>\n\
-    <p>Contact: $contact</p>\n\
-    <p>Policy: $policy</p>\n\
-    <p>Fingerprint: <a href="http://torstatus.blutmagie.de/router_detail.php?FP=$fingerprint";>$prettyFingerprint</a></p>\n\
-    <p>Version: $version</p>\n\
-    ]]>\n\
-    </description>\n\
-    <styleUrl>$styleUrl</styleUrl>\n\
-    <Point>\n\
-        <coordinates>$longitude,$latitude</coordinates>\n\
-    </Point>\n\
-    </Placemark>\n\
-    ')
-        
-kmlBody = ()
-
-def generateFolder(name, styleUrl, relays):
-    group = '<Folder>\n<name>%s</name>\n' % name
-    for fingerprint,relay in relays.items():
-        # for displaying: pretty fingerprint in blocks of four, uppercase       
-        relay['prettyFingerprint'] = " ".join(filter(None, re.split('(\w{4})', fingerprint.upper())))
-        relay['styleUrl'] = styleUrl
-        placemark = placemarkTemplate.safe_substitute(relay)
-        group = group + placemark
-    group = group + "\n</Folder>"
-    return group
-    
-kmlBody = generateFolder("%s Fast Exits (>= 1MB/s)" % len(exitFastRelays), "#exitFast", exitFastRelays)
-kmlBody = kmlBody + generateFolder("%s Exits" % len(exitRelays), "#exit", exitRelays)
-kmlBody = kmlBody + generateFolder("%s Fast stable nodes (>= 1MB/s)" % len(stableFastRelays), "#stableFast", stableFastRelays)
-kmlBody = kmlBody + generateFolder("%s Stable nodes" % len(stableRelays), "#stable", stableRelays)
-kmlBody = kmlBody + generateFolder("%s Other" % len(otherRelays), "#other", otherRelays)
-
-kml = open('tormap.kml', 'w')
-
-kmlHeader = (
-    '<?xml version="1.0" encoding="UTF-8"?>\n'
-    '<kml xmlns="http://www.opengis.net/kml/2.2"; xmlns:gx="http://www.google.com/kml/ext/2.2"; xmlns:kml="http://www.opengis.net/kml/2.2"; xmlns:atom="http://www.w3.org/2005/Atom";>\n'
-    '<Document>\n'
-    '   <name>Tor relays</name>\n'
-    '   <Style id="exit">\n'
-    '       <IconStyle>\n'
-    '           <Icon>\n'
-    '               <href>http://maps.google.com/mapfiles/kml/paddle/grn-blank.png</href>\n'
-    '           </Icon>\n'
-    '       </IconStyle>\n'
-    '   </Style>\n'
-    '   <Style id="exitFast">\n'
-    '       <IconStyle>\n'
-    '           <Icon>\n'
-    '               <href>http://maps.google.com/mapfiles/kml/paddle/red-stars.png</href>\n'
-    '           </Icon>\n'
-    '       </IconStyle>\n'
-    '   </Style>\n'
-    '   <Style id="stable">\n'
-    '       <IconStyle>\n'
-    '           <Icon>\n'
-    '               <href>http://maps.google.com/mapfiles/kml/paddle/ylw-blank.png</href>\n'
-    '           </Icon>\n'
-    '       </IconStyle>\n'
-    '   </Style>\n'
-    '   <Style id="stableFast">\n'
-    '       <IconStyle>\n'
-    '           <Icon>\n'
-    '               <href>http://maps.google.com/mapfiles/kml/paddle/ylw-stars.png</href>\n'
-    '           </Icon>\n'
-    '       </IconStyle>\n'
-    '   </Style>\n'
-    '   <Style id="other">\n'
-    '       <IconStyle>\n'
-    '           <Icon>\n'
-    '               <href>http://maps.google.com/mapfiles/kml/paddle/wht-blank.png</href>\n'
-    '           </Icon>\n'
-    '       </IconStyle>\n'
-    '   </Style>\n'
-    )
- 
-kmlFooter = ('</Document>\n'
-             '</kml>\n')
- 
-kml.write(kmlHeader)
-kml.write(kmlBody)
-kml.write(kmlFooter)
-kml.close()
\ No newline at end of file
+
+f = open("nodes.csv", "w",newline='\n')
+csvf = csv.writer(f,dialect='excel')
+csvf.writerow(list(allRelays.popitem()[1])) #write the first row
+for item in allRelays.values():
+    csvf.writerow(list(item.values()))
+
+f.close()
+exit()
\ No newline at end of file

Attachment: signature.asc
Description: PGP signature

_______________________________________________
tor-talk mailing list
tor-talk@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-talk