[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r16441: {torflow} Fixed result saving issues. Less googling. Basic dns stuff. (torflow/branches/gsoc2008)



Author: aleksei
Date: 2008-08-05 22:50:47 -0400 (Tue, 05 Aug 2008)
New Revision: 16441

Modified:
   torflow/branches/gsoc2008/soat.py
   torflow/branches/gsoc2008/soatstats.py
Log:
Fixed result saving issues. Less googling. Basic dns stuff.

Modified: torflow/branches/gsoc2008/soat.py
===================================================================
--- torflow/branches/gsoc2008/soat.py	2008-08-05 23:33:43 UTC (rev 16440)
+++ torflow/branches/gsoc2008/soat.py	2008-08-06 02:50:47 UTC (rev 16441)
@@ -20,6 +20,7 @@
 
 __all__ = ["ExitNodeScanner", "load_wordlist", "get_urls"]
 
+import commands
 import httplib
 import os
 import random
@@ -158,12 +159,23 @@
         # get a data handler
         self.__datahandler = DataHandler()
 
-        # get the nodes tested so far
+        # get stats about previous runs
         plog('INFO', 'Loading the previous run stats')
-        self.ssh_tested = Set([x.exit_node for x in self.__datahandler.getSsh()])
-        self.http_tested = Set([x.exit_node for x in self.__datahandler.getHttp()])
-        self.ssl_tested = Set([x.exit_node for x in self.__datahandler.getSsl()])    
 
+        ssh_results = self.__datahandler.getSsh()
+        ssl_results = self.__datahandler.getSsl()
+        http_results = self.__datahandler.getHttp()
+
+        # get lists of tested nodes
+        self.ssh_tested = Set([x.exit_node for x in ssh_results])
+        self.http_tested = Set([x.exit_node for x in http_results])
+        self.ssl_tested = Set([x.exit_node for x in ssl_results])
+        
+        # get the number of failures
+        self.ssh_fail = [self.__datahandler.filterResults(ssh_results, show_ssh=True, show_bad=True)]
+        self.http_fail =  [self.__datahandler.filterResults(http_results, show_http=True, show_bad=True)]
+        self.ssl_fail = [self.__datahandler.filterResults(ssl_results, show_ssl=True, show_bad=True)]
+
         plog('INFO', 'ExitNodeScanner up and ready')
 
     def get_exit_node(self):
@@ -266,8 +278,8 @@
             plog('INFO', 'We had no exit node to test, skipping to the next test.')
             return 0
 
-        # an address representation acceptable for a filename (leave out the http:// and replace slashes)
-        address_file = address[7:].replace('/','_')
+        # an address representation acceptable for a filename 
+        address_file = self.__datahandler.safeFilename(address[7:])
 
         # if we have no content, we had a connection error
         if pcontent == 0:
@@ -365,8 +377,8 @@
         ''' check whether an https connection to a given address is molested '''
         plog('INFO', 'Conducting an ssl test with destination ' + address)
 
-        # an address representation acceptable for a filename (leave out the https:// and replace slashes)
-        address_file = address[8:].replace('/','_')
+        # an address representation acceptable for a filename 
+        address_file = self.__datahandler.safeFilename(address[8:])
 
         # get the cert via tor
 
@@ -829,6 +841,28 @@
 
         return TEST_SUCCESS
 
+    def check_dns(self, address):
+        ''' A basic comparison DNS test. '''
+        # TODO Spawns a lot of false positives (for ex. doesn't work for google.com). 
+        plog('INFO', 'Conducting a basic dns test for destination ' + address)
+
+        # this should be replaced
+        ip = commands.getoutput("tor-resolve " + address)
+
+        ips_d = Set([])
+        try:
+            results = socket.getaddrinfo(address,None)
+            for result in results:
+                ips_d.add(result[4][0])
+        except Exception, e:
+            plog('ERROR', e)
+            return TEST_INCONCLUSIVE
+
+        if ip in ips_d:
+            return TEST_SUCCESS
+        else:
+            return TEST_FAILURE # might also do a direct connection reverse lookup
+
     def http_request(self, address):
         ''' perform a http GET-request and return the content received '''
         request = 0
@@ -1000,12 +1034,19 @@
     http_done = False
     ssh_done = True
 
-    # get some semi-random urls, try to test the exit node for each protocol needed, get a new node
-    while 1: 
+    # get some semi-random test targets
+    # http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
+    http_urls = ['http://math.ut.ee']
+    ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm'] # the search for https stuff is yet too slow
+
+    # get the starting rate of failed tests
+    # so we can determine when the test targets should be updated
+    http_fail = len(scanner.http_fail)
+    ssl_fail = len(scanner.ssl_fail)
+
+    # try to test the exit node for each protocol needed, get a new node
+    while 1:  
         
-        http_urls = get_urls(wordlist, protocol='http')
-        ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm'] # the search for https stuff is yet too slow
-        
         # https test  
         if not ssl_done:
             # XXX Uncomment this to try using SETEXIT
@@ -1038,6 +1079,7 @@
                 + ' (~' + `((http_tested_n * 100) / http_nodes_n)` + '%)')
             if http_tested_n >= http_nodes_n:
                 http_done = True
+        
         # ssh test
         '''
         if not ssh_done:
@@ -1054,10 +1096,16 @@
         if ssl_done and http_done and ssh_done:
             plog('INFO','Wow! We have tested the whole tor network. Check soatstats.py for results')
             break
-        else:
-            pass
-            scanner.get_new_circuit()
-            time.sleep(1)
+
+        # if we've been having too many false positives lately, get a new target list
+        if len(scanner.http_fail) - http_fail >= len(http_urls):
+            http_urls = get_urls(wordlist, protocol='http', results_per_type=10, g_results_per_page=20)
+            http_fail = len(scanner.http_fail)
+
+        # if len(scanner.ssl_fail) - ssl_fail >= len(ssl_urls):
+        #     ssl_urls = ['https://mail.google.com', 'https://addons.mozilla.org', 'https://www.fastmail.fm']
+        #     ssl_fail = len(scanner.ssl_fail)
+
 #
 # initiate the program
 #

Modified: torflow/branches/gsoc2008/soatstats.py
===================================================================
--- torflow/branches/gsoc2008/soatstats.py	2008-08-05 23:33:43 UTC (rev 16440)
+++ torflow/branches/gsoc2008/soatstats.py	2008-08-06 02:50:47 UTC (rev 16441)
@@ -70,8 +70,8 @@
 class DataHandler:
     ''' Class for saving and managing test result data '''
     def filterResults(self, results, 
-            show_ssh, show_http, show_ssl, 
-            show_good, show_bad, show_unsure):
+            show_ssh=False, show_http=False, show_ssl=False, 
+            show_good=False, show_bad=False, show_unsure=False):
         ''' filter results based on protocol and success level ''' 
 
         filters = []
@@ -131,6 +131,15 @@
 
         return results
 
+    def safeFilename(self, str):
+        ''' 
+        remove characters illegal in some systems 
+        and trim the string to a reasonable length
+        '''
+        replaced = (str.replace('/','_').replace('\\','_').replace('?','_').replace(':','_').
+            replace('|','_').replace('*','_').replace('<','_').replace('>','_').replace('"',''))
+        return replaced[:200]
+
     def saveResult(self, result):
         ''' generic method for saving test results '''
         if result.__class__.__name__ == 'HttpTestResult':
@@ -150,8 +159,7 @@
         elif result.status == TEST_INCONCLUSIVE:
             dir = http_i_dir
 
-        # an address representation acceptable for a filename (leave out the http:// and replace slashes)
-        address = result.site[7:].replace('/','_') 
+        address = self.safeFilename(result.site[7:])
 
         if dir:
             result_file = open(dir + `result.exit_node` + '_' + address + '.result','w')
@@ -174,7 +182,7 @@
             dir = ssl_i_dir
 
         # an address representation acceptable for a filename (leave out the https:// and replace slashes)
-        address = result.site[8:].replace('/','_') 
+        address = self.safeFilename(result.site[8:])
 
         if dir:
             result_file = open(dir + `result.exit_node` + '_' + address + '.result','w')