[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r16585: {torflow} soat: usability improvements, remap event monitoring (in torflow/branches/gsoc2008: . data/soat data/soat/dns data/soat/dnsrebind data/soat/imap data/soat/pop data/soat/smtp)



Author: aleksei
Date: 2008-08-18 12:24:14 -0400 (Mon, 18 Aug 2008)
New Revision: 16585

Added:
   torflow/branches/gsoc2008/data/soat/dns/
   torflow/branches/gsoc2008/data/soat/dns/failed/
   torflow/branches/gsoc2008/data/soat/dns/inconclusive/
   torflow/branches/gsoc2008/data/soat/dns/successful/
   torflow/branches/gsoc2008/data/soat/dnsrebind/
   torflow/branches/gsoc2008/data/soat/dnsrebind/failed/
   torflow/branches/gsoc2008/data/soat/dnsrebind/inconclusive/
   torflow/branches/gsoc2008/data/soat/dnsrebind/successful/
   torflow/branches/gsoc2008/data/soat/imap/
   torflow/branches/gsoc2008/data/soat/imap/failed/
   torflow/branches/gsoc2008/data/soat/imap/inconclusive/
   torflow/branches/gsoc2008/data/soat/imap/successful/
   torflow/branches/gsoc2008/data/soat/pop/
   torflow/branches/gsoc2008/data/soat/pop/failed/
   torflow/branches/gsoc2008/data/soat/pop/inconclusive/
   torflow/branches/gsoc2008/data/soat/pop/successful/
   torflow/branches/gsoc2008/data/soat/smtp/
   torflow/branches/gsoc2008/data/soat/smtp/failed/
   torflow/branches/gsoc2008/data/soat/smtp/inconclusive/
   torflow/branches/gsoc2008/data/soat/smtp/successful/
Modified:
   torflow/branches/gsoc2008/soat.py
   torflow/branches/gsoc2008/soatstats.py
Log:
soat: usability improvements, remap event monitoring

Modified: torflow/branches/gsoc2008/soat.py
===================================================================
--- torflow/branches/gsoc2008/soat.py	2008-08-18 16:01:08 UTC (rev 16584)
+++ torflow/branches/gsoc2008/soat.py	2008-08-18 16:24:14 UTC (rev 16585)
@@ -18,9 +18,10 @@
 See also python soatstats.py
 '''
 
-__all__ = ["ExitNodeScanner", "load_wordlist", "get_urls"]
+__all__ = ["ExitNodeScanner", "DNSRebindScanner", "load_wordlist", "get_urls"]
 
 import commands
+import getopt
 import httplib
 import os
 import random
@@ -41,7 +42,7 @@
 from TorCtl.TorUtil import meta_port, meta_host, control_port, control_host, tor_port, tor_host
 from TorCtl.TorUtil import *
 from TorCtl.PathSupport import *
-from TorCtl.TorCtl import Connection
+from TorCtl.TorCtl import Connection, EventHandler
 
 from OpenSSL import *
 
@@ -71,6 +72,20 @@
     ["http", ExitPolicyRestriction('255.255.255.255', 80), "https", ExitPolicyRestriction('255.255.255.255', 443)]
 ]
 
+#
+# non-public IPv4 address ranges network portions
+# refer to: www.iana.org/assignments/ipv4-address-space, www.iana.org/assignments/multicast-addresses
+# 
+ipv4_nonpublic = [
+    '00000000',         # default route and its network: 0.0.0.0/8
+    '00001010',         # private 10.0.0.0/8
+    '01111111',         # loopback 127.0.0.0/8
+    '1010100111111110', # link-local 169.254.0.0/16
+    '101011000001',     # private 172.16.0.0/12
+    '1100000010101000', # private 192.168.0.0/16
+    '111'               # multicast & experimental 224.0.0.0/3
+]
+
 # tags and attributes to check in the http test: XXX these should be reviewed
 # See also: http://ha.ckers.org/xss.html
 # Note: the more we add, the greater the potential for false positives...  
@@ -106,6 +121,24 @@
             response = response[:-1]
         return response 
 
+class DNSRebindScanner(EventHandler):
+    ''' 
+    A tor control event handler extending TorCtl.EventHandler 
+    Monitors for REMAP events (see check_dns_rebind())
+    '''
+    def __init__(self, exit_node_scanner):
+        self.__soat = exit_node_scanner
+
+    def stream_status_event(self, event):
+        if event.status == 'REMAP':
+            octets = map(lambda x: int2bin(x).zfill(8), event.target_host.split('.'))
+            ipbin = ''.join(octets)
+            for network in ipv4_nonpublic:
+                if ipbin[:len(network)] == network:
+                    handler = DataHandler()
+                    result = DNSRebindTestResult(self.__soat.get_exit_node(), '', TEST_FAILURE)
+                    handler.saveResult(result)
+
 class ExitNodeScanner:
     ''' The scanner class '''
     def __init__(self):
@@ -207,7 +240,6 @@
     def set_new_exit(self, exit):
         ''' 
         tell metatroller to set the given node as the exit in the next circuit 
-        Note: currently not used
         '''
         plog('NOTICE', 'Trying to set ' + `exit` + ' as the exit for the next circuit')
         self.__meta.writeline("SETEXIT $"+exit)
@@ -276,7 +308,7 @@
         exit_node = self.get_exit_node()
         if exit_node == 0 or exit_node == '0' or not exit_node:
             plog('INFO', 'We had no exit node to test, skipping to the next test.')
-            return 0
+            return TEST_SUCCESS
 
         # an address representation acceptable for a filename 
         address_file = self.__datahandler.safeFilename(address[7:])
@@ -365,9 +397,9 @@
 
     def check_openssh(self, address):
         ''' check whether an openssh connection to a given address is molested '''
-        ssh = pyssh.Ssh('username', 'host', 22)
-        ssh.set_sshpath(pyssh.SSH_PATH)
-
+        # TODO
+        #ssh = pyssh.Ssh('username', 'host', 22)
+        #ssh.set_sshpath(pyssh.SSH_PATH)
         #response = self.ssh.sendcmd('ls')
         #print response
 
@@ -398,7 +430,7 @@
 
         # if we got no cert, there was an ssl error
         if cert == 0:
-            result = OpenSSLTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
+            result = SSLTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
             self.__datahandler.saveResult(result)
             return TEST_INCONCLUSIVE
 
@@ -435,7 +467,7 @@
         # if certs match, everything is ok
         if cert_pem == original_cert_pem:
             cert_file = ssl_certs_dir + address_file + '.pem'
-            result = OpenSSLTestResult(exit_node, address, cert_file, TEST_SUCCESS)
+            result = SSLTestResult(exit_node, address, cert_file, TEST_SUCCESS)
             self.__datahandler.saveResult(result)
             return TEST_SUCCESS
         
@@ -444,7 +476,7 @@
         original_cert_new = self.ssl_request(address)
         if original_cert_new == 0:
             plog('ERROR', 'Error getting the correct cert for ' + address)
-            result = OpenSSLTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
+            result = SSLTestResult(exit_node, address, 0, TEST_INCONCLUSIVE)
             self.__datahandler.saveResult(result)
             return TEST_INCONCLUSIVE
 
@@ -460,7 +492,7 @@
             cert_file.write(cert_pem)
             cert_file.close()
 
-            result = OpenSSLTestResult(exit_node, address, cert_file_name, TEST_FAILURE)
+            result = SSLTestResult(exit_node, address, cert_file_name, TEST_FAILURE)
             self.__datahandler.saveResult(result)
             return TEST_FAILURE
 
@@ -476,7 +508,7 @@
         # if certs match, everything is ok
         if cert_pem == original_cert_new_pem:
             cert_file = ssl_certs_dir + address_file + '.pem'
-            result = OpenSSLTestResult(exit_node, address, cert_file, TEST_SUCCESS)
+            result = SSLTestResult(exit_node, address, cert_file, TEST_SUCCESS)
             self.__datahandler.saveResult(result)
             return TEST_SUCCESS
 
@@ -488,19 +520,19 @@
         cert_file.write(cert_pem)
         cert_file.close()
 
-        result = OpenSSLTestResult(exit_node, address, cert_file_name, TEST_FAILURE)
+        result = SSLTestResult(exit_node, address, cert_file_name, TEST_FAILURE)
         self.__datahandler.saveResult(result)
 
         return TEST_FAILURE
 
-    def check_smtp(self, address):
+    def check_smtp(self, address, port=''):
         ''' 
         check whether smtp + tls connection to a given address is molested
         this is done by going through the STARTTLS sequence and comparing server
         responses for the direct and tor connections
         '''
 
-        plog('INFO', 'Conducting an stmp test with destination ' + address)
+        plog('INFO', 'Conducting an smtp test with destination ' + address)
 
         defaultsocket = socket.socket
         socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
@@ -511,7 +543,7 @@
         ehlo2_reply = 0
 
         try:
-            s = smtplib.SMTP(address)
+            s = smtplib.SMTP(address, port)
             ehlo1_reply = s.ehlo()[0]
             if ehlo1_reply != 250:
                 raise smtplib.SMTPException('First ehlo failed')
@@ -526,11 +558,12 @@
         except socket.gaierror, e:
             plog('ERROR', 'A connection error occured while testing smtp at ' + address)
             plog('ERROR', e)
-            socket.socket = defaultsocket 
+            socket.socket = defaultsocket
             return TEST_INCONCLUSIVE
         except Exception, e:
             plog('ERROR','An error occured while testing smtp at ' + address)
             plog('ERROR', e)
+            return TEST_INCONCLUSIVE
         # reset the connection method back to direct
         socket.socket = defaultsocket 
 
@@ -538,7 +571,7 @@
         exit_node = self.get_exit_node()
         if exit_node == 0 or exit_node == '0':
             plog('INFO', 'We had no exit node to test, skipping to the next test.')
-            return 0
+            return TEST_SUCCESS
 
         # now directly
 
@@ -547,7 +580,7 @@
         ehlo2_reply_d = 0
 
         try:
-            s = smtplib.SMTP(address)
+            s = smtplib.SMTP(address, port)
             ehlo1_reply_d = s.ehlo()[0]
             if ehlo1_reply != 250:
                 raise smtplib.SMTPException('First ehlo failed')
@@ -555,20 +588,27 @@
             if not has_starttls_d:
                 raise smtplib.SMTPException('It seems that the server doesn\'t support starttls')
             s.starttls()
-            ehlo2_reply = s.ehlo()[0]
-            if ehlo2_reply != 250:
+            ehlo2_reply_d = s.ehlo()[0]
+            if ehlo2_reply_d != 250:
                 raise smtplib.SMTPException('Second ehlo failed')
         except Exception, e:
             plog('ERROR', 'An error occurred while testing smtp at ' + address)
             plog('ERROR', e)
+            return TEST_INCONCLUSIVE
 
+        print ehlo1_reply, ehlo1_reply_d, has_starttls, has_starttls_d, ehlo2_reply, ehlo2_reply_d
+
         # compare
         if ehlo1_reply != ehlo1_reply_d or has_starttls != has_starttls_d or ehlo2_reply != ehlo2_reply_d:
+            result = SMTPTestResult(exit_node, address, TEST_FAILURE)
+            self.__datahandler.saveResult(result)
             return TEST_FAILURE
 
+        result = SMTPTestResult(exit_node, address, TEST_SUCCESS)
+        self.__datahandler.saveResult(result)
         return TEST_SUCCESS
 
-    def check_pop(self, address):
+    def check_pop(self, address, port=''):
         ''' 
         check whether a pop + tls connection to a given address is molested 
         it is implied that the server reads/sends messages compliant with RFC1939 & RFC2449
@@ -576,6 +616,9 @@
 
         plog('INFO', 'Conducting a pop test with destination ' + address)
 
+        if not port:
+            port = 110
+
         defaultsocket = socket.socket
         socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
         socket.socket = socks.socksocket
@@ -586,7 +629,7 @@
         tls_succeeded = None
 
         try:
-            pop = Client(address, 110)
+            pop = Client(address, port)
         
             # read the server greeting
             server_greeting = pop.readline()
@@ -645,6 +688,12 @@
         # reset the connection to default
         socket.socket = defaultsocket
 
+        # check whether the test was valid at all
+        exit_node = self.get_exit_node()
+        if exit_node == 0 or exit_node == '0':
+            plog('INFO', 'We had no exit node to test, skipping to the next test.')
+            return TEST_SUCCESS
+
         # do the same for the direct connection
 
         capabilities_ok_d = False
@@ -653,7 +702,7 @@
         tls_succeeded_d = None
 
         try:
-            pop = Client(address, 110)
+            pop = Client(address, port)
         
             # read the server greeting
             server_greeting = pop.readline()
@@ -711,17 +760,24 @@
         # compare
         if (capabilities_ok != capabilities_ok_d or starttls_present != starttls_present_d or 
                 tls_started != tls_started_d or tls_suceeded != tls_succeeded_d):
+            result = POPTestResult(exit_node, address, TEST_FAILURE)
+            self.__datahandler.saveResult(result)
             return TEST_FAILURE
         
+        result = POPTestResult(exit_node, address, TEST_SUCCESS)
+        self.__datahandler.saveResult(result)
         return TEST_SUCCESS
 
-    def check_imap(self, address):
+    def check_imap(self, address, port=''):
         ''' 
         check whether an imap + tls connection to a given address is molested 
         it is implied that the server reads/sends messages compliant with RFC3501
         ''' 
         plog('INFO', 'Conducting an imap test with destination ' + address)
 
+        if not port:
+            port = 143
+
         defaultsocket = socket.socket
         socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
         socket.socket = socks.socksocket
@@ -731,7 +787,7 @@
         tls_started = None
         tls_succeeded = None
         try:
-            imap = Client(address, 143)
+            imap = Client(address, port)
 
             # read server greeting
             server_greeting = imap.readline()
@@ -781,13 +837,19 @@
             return TEST_INCONCLUSIVE
         socket.socket = defaultsocket 
 
+        # check whether the test was valid at all
+        exit_node = self.get_exit_node()
+        if exit_node == 0 or exit_node == '0':
+            plog('INFO', 'We had no exit node to test, skipping to the next test.')
+            return TEST_SUCCESS
+
         # do the same for the direct connection
         capabilities_ok_d = None
         starttls_present_d = None
         tls_started_d = None
         tls_succeeded_d = None
         try:
-            imap = Client(address, 143)
+            imap = Client(address, port)
 
             # read server greeting
             server_greeting = imap.readline()
@@ -837,18 +899,27 @@
         # compare
         if (capabilities_ok != capabilities_ok_d or starttls_present != starttls_present_d or 
             tls_started != tls_started_d or tls_succeeded != tls_succeeded_d):
+            result = IMAPTestResult(exit_node, address, TEST_FAILURE)
+            self.__datahandler.saveResult(result)
             return TEST_FAILURE
 
+        result = IMAPTestResult(exit_node, address, TEST_SUCCESS)
+        self.__datahandler.saveResult(result)
         return TEST_SUCCESS
 
     def check_dns(self, address):
-        ''' A basic comparison DNS test. '''
+        ''' A basic comparison DNS test. Rather unreliable. '''
         # TODO Spawns a lot of false positives (for ex. doesn't work for google.com). 
         plog('INFO', 'Conducting a basic dns test for destination ' + address)
 
-        # this should be replaced
-        ip = commands.getoutput("tor-resolve " + address)
+        ip = tor_resolve(address)
 
+        # check whether the test was valid at all
+        exit_node = self.get_exit_node()
+        if exit_node == 0 or exit_node == '0':
+            plog('INFO', 'We had no exit node to test, skipping to the next test.')
+            return TEST_SUCCESS
+
         ips_d = Set([])
         try:
             results = socket.getaddrinfo(address,None)
@@ -859,10 +930,23 @@
             return TEST_INCONCLUSIVE
 
         if ip in ips_d:
+            result = DNSTestResult(exit_node, address, TEST_SUCCESS)
             return TEST_SUCCESS
         else:
-            return TEST_FAILURE # might also do a direct connection reverse lookup
+            plog('ERROR', 'The basic DNS test suspects ' + exit_node + ' to be malicious.')
+            result = DNSTestResult(exit_node, address, TEST_FAILURE)
+            return TEST_FAILURE
 
+    def check_dns_rebind(self):
+        ''' 
+        A DNS-rebind attack test that runs in the background and monitors REMAP events
+        The test makes sure that external hosts are not resolved to private addresses    
+        '''
+        plog('INFO', 'Monitoring REMAP events for weirdness')
+        self.__dnshandler = DNSRebindScanner(self)
+        self.__control.set_event_handler(self.__dnshandler)
+        self.__control.set_events([TorCtl.EVENT_TYPE.STREAM], True)
+
     def http_request(self, address):
         ''' perform a http GET-request and return the content received '''
         request = 0
@@ -918,7 +1002,6 @@
         # return the cert
         return c.get_peer_certificate()
 
-
 # some helpful methods
 
 def load_wordlist(file):
@@ -958,6 +1041,7 @@
 
             # search google for relevant pages
             # note: google only accepts requests from idenitified browsers
+            # TODO handle the case when google doesn't want to give us result anymore
             host = 'www.google.com'
             params = urllib.urlencode({'q' : query})
             headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1) Gecko/20061010 Firefox/2.0'}
@@ -995,117 +1079,221 @@
          
     return list(Set(urllist))
 
+def tor_resolve(address):
+    ''' performs a DNS query explicitly via tor '''
+    return commands.getoutput("tor-resolve " + address)
+
+def int2bin(n):
+    '''
+    simple decimal -> binary conversion, needed for comparing IP addresses 
+    '''
+    n = int(n)
+    if n < 0:
+        raise ValueError, "Negative values are not accepted."
+    elif n == 0:
+        return '0'
+    else:
+        bin = ''
+        while n > 0:
+            bin += str(n % 2)
+            n = n >> 1
+        return bin[::-1]
 #
 # main logic
 #
 def main(argv):
-    scanner = ExitNodeScanner()
-    
-    # 
-    # 1) consistency test
-    #
+    # make sure we have something to test for
+    if len(argv) < 2:
+        print ''
+        print 'Please provide at least one test option:'
+        print '--ssl (~works)'
+        print '--http (gives some false positives)'
+        print '--ssh (doesn\'t work yet)'
+        print '--smtp (~works)'
+        print '--pop (~works)'
+        print '--imap (~works)'
+        print '--dns (a basic test, not really reliable)'
+        print '--dnsrebind (works with the ssl test)'
+        print '--policies (~works)'
+        print ''
+        sys.exit(0)
 
-    # scanner.check_all_exits_port_consistency()
+    opts = ['ssl','http','ssh','smtp','pop','imap','dns','dnsrebind','policies']
+    flags, trailer = getopt.getopt(argv[1:], [], opts)
     
-    #
-    # 2) test for checking yet unchecked nodes
-    # XXX use SETEXIT systematically, after 'all nodes' have been tested, just continue with NEWEXIT
-    #
+    # get specific test types
+    do_ssl = ('--ssl','') in flags
+    do_http = ('--http','') in flags
+    do_ssh = ('--ssh','') in flags
+    do_smtp = ('--smtp','') in flags
+    do_pop = ('--pop','') in flags
+    do_imap = ('--imap','') in flags
+    do_dns_basic = ('--dns','') in flags
+    do_dns_rebind = ('--dnsrebind','') in flags
+    do_consistency = ('--policies','') in flags
 
     # load the wordlist to search for sites lates on
     wordlist = load_wordlist(wordlist_file)
-    
-    # get the total number of nodes for ports
-    ssl_nodes = scanner.get_nodes_for_port(443)
-    http_nodes = scanner.get_nodes_for_port(80)
-    #ssh_nodes = scanner.get_nodes_for_port(22)
 
-    ssl_nodes_n = len(ssl_nodes)
-    http_nodes_n = len(http_nodes)
-    # ssh_nodes_n = len(ssh_nodes)
+    # initiate the scanner
+    scanner = ExitNodeScanner()
 
-    # lists of addresses (generated later with get_urls)
-    ssl_urls = []
-    http_urls = []
-    # ssh_urls = []
+    # initiate the passive dns rebind attack monitor
+    if do_dns_rebind:
+        scanner.check_dns_rebind()
 
-    # test terminating conditions for somewhat ok network coverage
-    ssl_done = False
-    http_done = False
-    ssh_done = True
+    # check for sketchy exit policies
+    if do_consistency:
+        scanner.check_all_exits_port_consistency()
 
-    # get some semi-random test targets
-    # http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
-    http_urls = ['http://math.ut.ee']
-    ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm'] # the search for https stuff is yet too slow
+    # maybe only the consistency test was required
+    if not (do_ssl or do_http or do_ssh or do_smtp or do_pop or do_imap or do_dns):
+        plog('INFO', 'Done.')
+        sys.exit(0)
 
-    # get the starting rate of failed tests
-    # so we can determine when the test targets should be updated
-    http_fail = len(scanner.http_fail)
-    ssl_fail = len(scanner.ssl_fail)
+    # declare some variables and assign values if neccessary
+    ssl_nodes = http_nodes = ssh_nodes = smtp_nodes = pop_nodes = imap_nodes = dns_nodes = []
+    ssl_nodes_n = http_nodes_n = ssh_nodes_n = smtp_nodes_n = pop_nodes_n = imap_nodes_n = dns_nodes_n = 0
+    ssl_urls = http_urls = ssh_urls = smtp_urls = pop_urls = imap_urls = dns_urls = []
+    ssl_fail = http_fail = ssh_fail = smtp_fail = pop_fail = imap_fail = imap_urls = 0
 
-    # try to test the exit node for each protocol needed, get a new node
+    if do_ssl:
+        ssl_nodes = scanner.get_nodes_for_port(443)
+        ssl_nodes_n = len(ssl_nodes)
+        # the search for https urls is yet too slow
+        ssl_urls =  ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm'] 
+        ssl_fail = len(scanner.ssl_fail)
+
+        if len(ssl_urls) == 0:
+            plog('ERROR', 'No urls specified for ssl testing.')
+            do_ssl = False
+
+    if do_http:
+        http_nodes = scanner.get_nodes_for_port(80)
+        http_nodes_n = len(http_nodes)
+        http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
+        http_fail = len(scanner.http_fail)
+
+        if len(http_urls) == 0:
+            plog('ERROR', 'No urls specified for http testing.')
+            do_http = False
+
+    if do_ssh:
+        ssh_nodes = scanner.get_nodes_for_port(22)
+        ssh_nodes_n = len(ssh_nodes)
+        ssh_urls = []
+        ssh_fail = len(scanner.ssh_fail)
+
+        if len(ssl_urls) == 0:
+            plog('ERROR', 'No urls specified for ssh testing.')
+            do_ssh = False
+
+    if do_smtp:
+        smtp_urls = [('smtp.gmail.com','587')]
+
+        if len(smtp_urls) == 0:
+            plog('ERROR', 'No urls specified for smtp testing.')
+            do_smtp = False
+
+    if do_pop:
+        pop_urls = []
+
+        if len(pop_urls) == 0:
+            plog('ERROR', 'No urls specified for pop testing.')
+            do_pop = False
+
+    if do_imap:
+        imap_urls = []
+
+        if len(imap_urls) == 0:
+            plog('ERROR', 'No urls specified for imap testing.')
+            do_imap = False
+
+    if do_dns_basic:
+        dns_urls = []
+
+        if len(dns_urls) == 0:
+            plog('ERROR', 'No urls specified for dns testing.')
+            do_dns = False
+
+    # start testing
     while 1:  
         
         # https test  
-        if not ssl_done:
-            # XXX Uncomment this to try using SETEXIT
-            current_exit = random.choice([x for x in ssl_nodes if ('$' + `x.idhex`) not in scanner.ssl_tested])
-            scanner.set_new_exit(current_exit.idhex)
+        if do_ssl:
+            candidates = [x for x in ssl_nodes if ('$' + `x.idhex`) not in scanner.ssl_tested]
+            if len(candidates) > 0:
+                current_exit = random.choice(candidates)
+                scanner.set_new_exit(current_exit.idhex)
             
-            # Uncomment this to try NEWNYM after SETEXIT
             scanner.get_new_circuit()
-
             ssl_site = random.choice(ssl_urls)
             scanner.check_openssl(ssl_site)
+            
             ssl_tested_n = len(scanner.ssl_tested)
-            plog('INFO', 'Nodes ssl-tested: ' + `ssl_tested_n` + '/' + `ssl_nodes_n`
-                + ' (~' + `((ssl_tested_n * 100) / ssl_nodes_n)` + '%)')
-            if ssl_tested_n >= ssl_nodes_n:
-                ssl_done = True
+            if ssl_nodes_n > ssl_tested_n:
+                plog('INFO', 'Nodes ssl-tested: ' + `ssl_tested_n` + '/' + `ssl_nodes_n`
+                    + ' (~' + `((ssl_tested_n * 100) / ssl_nodes_n)` + '%)')
+        
         # http test
-        if not http_done:
-            # XXX Uncomment this to try using SETEXIT
-            current_exit = random.choice([x for x in http_nodes if ('$' + `x.idhex`) not in scanner.http_tested])
-            scanner.set_new_exit(current_exit.idhex)
+        if do_http:
+            candidates = [x for x in http_nodes if ('$' + `x.idhex`) not in scanner.http_tested]
+            if len(candidates) > 0 :
+                current_exit = random.choice(candidates)
+                scanner.set_new_exit(current_exit.idhex)
             
-            # XXX Uncomment this to try NEWNYM after SETEXIT
             scanner.get_new_circuit()
-
             http_site = random.choice(http_urls)
             scanner.check_http(http_site)
+
             http_tested_n = len(scanner.http_tested)
-            plog('INFO', 'Nodes http-tested: ' + `http_tested_n` + '/' + `http_nodes_n`
-                + ' (~' + `((http_tested_n * 100) / http_nodes_n)` + '%)')
-            if http_tested_n >= http_nodes_n:
-                http_done = True
+            if http_nodes_n > http_tested_n:
+                plog('INFO', 'Nodes http-tested: ' + `http_tested_n` + '/' + `http_nodes_n`
+                    + ' (~' + `((http_tested_n * 100) / http_nodes_n)` + '%)')
         
         # ssh test
-        '''
-        if not ssh_done:
+        if do_ssh:
+            candidates = [x for x in ssh_nodes if ('$' + `x.idhex`) not in scanner.ssh_tested]
+            if len(candidates) > 0:
+                current_exit = random.choice(candidates)
+                scanner.set_new_exit(current_exit.idhex)
+                
+            scanner.get_new_circuit()
             ssh_site = random.choice(ssh_urls)
-            scanner.check_openssh(ssh_site)
+            scanner.check_ssh(ssh_site)
+ 
             ssh_tested_n = len(scanner.ssh_tested)
-            plog('INFO', 'Nodes ssh-tested: ' + `ssh_tested_n` + '/' + `ssh_nodes_n`
-                + '(~' + `((ssh_tested_n * 100) / ssh_nodes_n)` + '%')')
-            if ssh_tested_n >= ssh_nodes_n:
-                ssh_done = True
-        '''
+            if ssh_nodes_n > ssh_tested_n:
+                plog('INFO', 'Nodes ssh-tested: ' + `ssh_tested_n` + '/' + `ssh_nodes_n`
+                    + '(~' + `((ssh_tested_n * 100) / ssh_nodes_n)` + '%')
 
-        # check whether we're done, otherwise get a new circuit
-        if ssl_done and http_done and ssh_done:
-            plog('INFO','Wow! We have tested the whole tor network. Check soatstats.py for results')
-            break
+        # smtp test
+        if do_smtp:
+            scanner.get_new_circuit()
+            smtp_site = random.choice(smtp_urls)
+            scanner.check_smtp(smtp_site[0], smtp_site[1])
 
+        # pop test
+        if do_pop:
+            scanner.get_new_circuit()
+            pop_site = random.choice(pop_urls)
+            scanner.check_pop(pop_site[0], pop_site[1])
+
+        # imap test
+        if do_imap:
+            scanner.get_new_circuit()
+            imap_site = random.choice(imap_urls)
+            scanner.check_imap(imap_site[0], imap_site[1])
+
+        #
+        # managing url lists
         # if we've been having too many false positives lately, get a new target list
-        if len(scanner.http_fail) - http_fail >= len(http_urls):
+        # 
+
+        if do_http and len(scanner.http_fail) - http_fail >= len(http_urls):
             http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
             http_fail = len(scanner.http_fail)
-
-        # if len(scanner.ssl_fail) - ssl_fail >= len(ssl_urls):
-        #     ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm']
-        #     ssl_fail = len(scanner.ssl_fail)
-
+        
 #
 # initiate the program
 #

Modified: torflow/branches/gsoc2008/soatstats.py
===================================================================
--- torflow/branches/gsoc2008/soatstats.py	2008-08-18 16:01:08 UTC (rev 16584)
+++ torflow/branches/gsoc2008/soatstats.py	2008-08-18 16:24:14 UTC (rev 16585)
@@ -1,4 +1,5 @@
 #!/usr/bin/python
+import dircache
 import operator
 import os
 import pickle
@@ -15,33 +16,17 @@
 # data locations
 
 data_dir = './data/soat/'
-
 ssl_certs_dir = data_dir + 'ssl/certs/'
-ssl_s_dir = data_dir + 'ssl/successful/'
-ssl_f_dir = data_dir + 'ssl/failed/'
-ssl_i_dir = data_dir + 'ssl/inconclusive/'
-ssl_result_dirs = [ssl_s_dir, ssl_f_dir, ssl_i_dir]
-
 http_tags_dir = data_dir + 'http/tags/'
-http_s_dir = data_dir + 'http/successful/'
-http_f_dir = data_dir + 'http/failed/'
-http_i_dir = data_dir + 'http/inconclusive/'
-http_result_dirs = [http_s_dir, http_f_dir, http_i_dir]
 
-ssh_s_dir = data_dir + 'ssh/successful/'
-ssh_f_dir = data_dir + 'ssh/failed/'
-ssh_i_dir = data_dir + 'ssh/inconclusive/'
-ssh_result_dirs = [ssh_s_dir, ssh_f_dir, ssh_i_dir]
+# constants
 
-result_dirs = [ssl_s_dir, ssl_f_dir, ssl_i_dir,
-               http_s_dir, http_f_dir, http_i_dir]
-
-# classes to use with pickle to dump test results into files
-
 TEST_SUCCESS = 0
 TEST_INCONCLUSIVE = 1
 TEST_FAILURE = 2
 
+# classes to use with pickle to dump test results into files
+
 class TestResult(object):
     ''' Parent class for all test result classes '''
     def __init__(self, exit_node, site, status):
@@ -50,23 +35,48 @@
         self.timestamp = time.time()
         self.status = status
 
-class OpenSSLTestResult(TestResult):
+class SSLTestResult(TestResult):
     ''' Represents the result of an openssl test '''
     def __init__(self, exit_node, ssl_site, cert_file, status):
-        super(OpenSSLTestResult, self).__init__(exit_node, ssl_site, status)
+        super(SSLTestResult, self).__init__(exit_node, ssl_site, status)
         self.cert = cert_file
 
 class HttpTestResult(TestResult):
-    ''' Represents the result of a web test '''
+    ''' Represents the result of a http test '''
     def __init__(self, exit_node, website, tag_prints, status):
         super(HttpTestResult, self).__init__(exit_node, website, status)
         self.tag_prints = tag_prints
 
-class OpenSSHTestResult(TestResult):
+class SSHTestResult(TestResult):
     ''' Represents the result of an ssh test '''
     def __init__(self, exit_node, ssh_site, status):
-        super(OpenSSHTestResult, self).__init__(exit_node, ssh_site, status)
+        super(SSHTestResult, self).__init__(exit_node, ssh_site, status)
 
+class DNSTestResult(TestResult):
+    ''' Represents the result of a dns test '''
+    def __init__(self, exit_node, dns_site, status):
+        super(DNSTestResult, self).__init__(exit_node, dns_site, status)
+
+class DNSRebindTestResult(TestResult):
+    ''' Represents the result of a dns rebind test '''
+    def __init__(self, exit_node, dns_rebind_site, status):
+        super(DNSRebindTestResult, self).__init__(exit_node, dns_rebind_site, status)
+
+class SMTPTestResult(TestResult):
+    ''' Represents the result of an smtp test '''
+    def __init__(self, exit_node, smtp_site, status):
+        super(SMTPTestResult, self).__init__(exit_node, smtp_site, status)
+
+class IMAPTestResult(TestResult):
+    ''' Represents the result of an imap test '''
+    def __init__(self, exit_node, imap_site, status):
+        super(IMAPTestResult, self).__init__(exit_node, imap_site, status)
+
+class POPTestResult(TestResult):
+    ''' Represents the result of a pop test '''
+    def __init__(self, exit_node, pop_site, status):
+        super(POPTestResult, self).__init__(exit_node, pop_site, status)
+
 class DataHandler:
     ''' Class for saving and managing test result data '''
     def filterResults(self, results, 
@@ -76,11 +86,11 @@
 
         filters = []
         if not show_ssh:
-            filters.append(lambda x: not x.__class__.__name__ == 'OpenSSHTestResult')
+            filters.append(lambda x: not x.__class__.__name__ == 'SSHTestResult')
         if not show_ssl:
-            filters.append(lambda x: not x.__class__.__name__ == 'OpenSSLTestResult')
+            filters.append(lambda x: not x.__class__.__name__ == 'SSLTestResult')
         if not show_http:
-            filters.append(lambda x: not x.__class__.__name__ == 'HttpTestResult')
+            filters.append(lambda x: not x.__class__.__name__ == 'TestResult')
         if not show_good:
             filters.append(lambda x: not x.status == TEST_SUCCESS)
         if not show_bad:
@@ -103,29 +113,43 @@
 
     def getAll(self):
         ''' get all available results'''
-        return self.__getResults(result_dirs)
+        return self.__getResults(data_dir)
 
     def getSsh(self):
         ''' get results of ssh tests '''
-        return self.__getResults(ssh_result_dirs)
+        return self.__getResults(data_dir + 'ssh/')
         
     def getHttp(self):
         ''' get results of http tests '''
-        return self.__getResults(http_result_dirs)
+        return self.__getResults(data_dir + 'http/')
 
     def getSsl(self):
         ''' get results of ssl tests '''
-        return self.__getResults(ssl_result_dirs)
+        return self.__getResults(data_dir + 'ssl/')
 
-    def __getResults(self, dirs):
-        ''' gather results from files in given directories'''
+    def getSmtp(self):
+        ''' get results of smtp tests '''
+        return self.__getResults(data_dir + 'smtp/')
+
+    def getPop(self):
+        ''' get results of pop tests '''
+        return self.__getResults(data_dir + 'pop/')
+
+    def getImap(self):
+        ''' get results of imap tests '''
+        return self.__getResults(data_dir + 'imap/')
+
+    def __getResults(self, dir):
+        ''' 
+        recursively traverse the directory tree starting with dir
+        gather test results from files ending with .result
+        '''
         results = []
 
-        for dir in dirs:
-            files = os.listdir(dir)
+        for root, dirs, files in os.walk(dir):
             for file in files:
-                if file[-6:] == 'result':
-                    fh = open(dir + file,'r')
+                if file.endswith('result'):
+                    fh = open(os.path.join(root, file))
                     result = pickle.load(fh)
                     results.append(result)
 
@@ -142,52 +166,27 @@
 
     def saveResult(self, result):
         ''' generic method for saving test results '''
+        address = ''
         if result.__class__.__name__ == 'HttpTestResult':
-            self.__saveHttp(result)
-        elif result.__class__.__name__ == 'OpenSSHTestResult':
-            self.__saveSsh(result)
-        elif result.__class__.__name__ == 'OpenSSLTestResult':
-            self.__saveSsl(result)
+            address = self.safeFilename(result.site[7:])
+        elif result.__class__.__name__ == 'SSLTestResult':
+            address = self.safeFilename(result.site[8:])
+        elif 'TestResult' in result.__class__.__name__:
+            address = self.safeFilename(result.site)
+        else:
+            raise Exception, 'This doesn\'t seems to be a result instance.'
 
-    def __saveHttp(self, result):
-        ''' save a http result '''
-        dir = None
+        dir = data_dir + result.__class__.__name__[:-10].lower() + '/'
         if result.status == TEST_SUCCESS:
-            dir = http_s_dir
-        elif result.status == TEST_FAILURE:
-            dir = http_f_dir
-        elif result.status == TEST_INCONCLUSIVE:
-            dir = http_i_dir
-
-        address = self.safeFilename(result.site[7:])
-
-        if dir:
-            result_file = open(dir + `result.exit_node` + '_' + address + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close() 
-
-
-    def __saveSsh(self, result):
-        ''' save an ssh result '''
-        pass
-
-    def __saveSsl(self, result):
-        ''' save an ssl result '''
-        dir = None
-        if result.status == TEST_SUCCESS:
-            dir = ssl_s_dir
-        elif result.status == TEST_FAILURE:
-            dir = ssl_f_dir
-        elif result.status == TEST_INCONCLUSIVE:
-            dir = ssl_i_dir
-
-        # an address representation acceptable for a filename (leave out the https:// and replace slashes)
-        address = self.safeFilename(result.site[8:])
-
-        if dir:
-            result_file = open(dir + `result.exit_node` + '_' + address + '.result','w')
-            pickle.dump(result, result_file)
-            result_file.close() 
+            dir += 'successful/'
+        if result.status == TEST_INCONCLUSIVE:
+            dir += 'inconclusive/'
+        if result.status == TEST_FAILURE:
+            dir += 'failed/'
+        
+        result_file = open(dir + `result.exit_node` + address + '.result', 'w')
+        pickle.dump(result, result_file)
+        result_file.close()
     
 #
 # Displaying stats on the console
@@ -235,13 +234,13 @@
             elif result.status == 2:
                 bad += 1
             
-            if result.__class__.__name__ == 'OpenSSHTestResult':
+            if result.__class__.__name__ == 'SSHTestResult':
                 sshSet.add(result.exit_node)
                 ssh += 1
             elif result.__class__.__name__ == 'HttpTestResult':
                 httpSet.add(result.exit_node)
                 http += 1
-            elif result.__class__.__name__ == 'OpenSSLTestResult':
+            elif result.__class__.__name__ == 'SSLTestResult':
                 sslSet.add(result.exit_node)
                 ssl += 1