[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r18350: {torflow} Fix a couple log messages and make PyPy existence optional. (torflow/trunk/NetworkScanners)



Author: mikeperry
Date: 2009-01-30 23:35:30 -0500 (Fri, 30 Jan 2009)
New Revision: 18350

Modified:
   torflow/trunk/NetworkScanners/libsoat.py
   torflow/trunk/NetworkScanners/soat.py
Log:

Fix a couple log messages and make PyPy existence optional.



Modified: torflow/trunk/NetworkScanners/libsoat.py
===================================================================
--- torflow/trunk/NetworkScanners/libsoat.py	2009-01-31 04:30:50 UTC (rev 18349)
+++ torflow/trunk/NetworkScanners/libsoat.py	2009-01-31 04:35:30 UTC (rev 18350)
@@ -20,11 +20,14 @@
 sys.path.append("../")
 from TorCtl.TorUtil import *
 
-sys.path.append("./libs/pypy-svn/")
-import pypy.rlib.parsing.parsing
-import pypy.lang.js.jsparser
+try:
+  sys.path.append("./libs/pypy-svn/")
+  import pypy.rlib.parsing.parsing
+  import pypy.lang.js.jsparser
+  HAVE_PYPY = True
+except ImportError:
+  HAVE_PYPY = False
 
-
 # constants
 
 TEST_SUCCESS = 0
@@ -402,10 +405,10 @@
     f.write(str(self))
     f.close()
 
-
 class JSDiffer:
+  # XXX: Strip html comments from these strings
   def __init__(self, js_string):
-    self.ast_cnts = self.count_ast_elements(js_string)
+    if HAVE_PYPY: self.ast_cnts = self._count_ast_elements(js_string)
 
   def _ast_recursive_worker(ast, ast_cnts):
     if not ast.symbol in ast_cnts:
@@ -416,7 +419,7 @@
         JSDiffer._ast_recursive_worker(child, ast_cnts)
   _ast_recursive_worker = Callable(_ast_recursive_worker)
  
-  def count_ast_elements(self, js_string, name="global"):
+  def _count_ast_elements(self, js_string, name="global"):
     ast_cnts = {}
     try:
       ast = pypy.lang.js.jsparser.parse(js_string)
@@ -456,11 +459,15 @@
     return False
 
   def prune_differences(self, other_string):
-    other_cnts = self.count_ast_elements(other_string)
+    if not HAVE_PYPY: return
+    other_cnts = self._count_ast_elements(other_string)
     self._difference_pruner(other_cnts)
 
   def contains_differences(self, other_string):
-    other_cnts = self.count_ast_elements(other_string)
+    if not HAVE_PYPY:
+      plog("NOTICE", "PyPy import not present. Not diffing javascript")
+      return False
+    other_cnts = self._count_ast_elements(other_string)
     return self._difference_checker(other_cnts) 
 
 class JSSoupDiffer(JSDiffer):
@@ -477,7 +484,7 @@
     return ret_cnts
   _add_cnts = Callable(_add_cnts)
 
-  def count_ast_elements(self, soup, name="Soup"):
+  def _count_ast_elements(self, soup, name="Soup"):
     ast_cnts = {}
     for tag in soup.findAll():
       if tag.name == 'script':
@@ -485,7 +492,7 @@
           if isinstance(child, Tag):
             plog("ERROR", "Script tag with subtag!")
           else:
-            tag_cnts = JSDiffer.count_ast_elements(self, str(child), tag.name)
+            tag_cnts = JSDiffer._count_ast_elements(self, str(child), tag.name)
             ast_cnts = JSSoupDiffer._add_cnts(tag_cnts, ast_cnts)
       for attr in tag.attrs:
         # hrmm.. %-encoding too? Firefox negs on it..
@@ -496,14 +503,18 @@
         elif attr[0] in attrs_with_raw_script_map:
           parse = str(attr[1])
         if not parse: continue
-        tag_cnts = JSDiffer.count_ast_elements(self,parse,tag.name+":"+attr[0])
+        tag_cnts = JSDiffer._count_ast_elements(self,parse,tag.name+":"+attr[0])
         ast_cnts = JSSoupDiffer._add_cnts(tag_cnts, ast_cnts)
     return ast_cnts
 
   def prune_differences(self, other_soup):
-    other_cnts = self.count_ast_elements(other_soup)
+    if not HAVE_PYPY: return
+    other_cnts = self._count_ast_elements(other_soup)
     self._difference_pruner(other_cnts)
 
   def contains_differences(self, other_soup):
-    other_cnts = self.count_ast_elements(other_soup)
+    if not HAVE_PYPY:
+      plog("NOTICE", "PyPy import not present. Not diffing javascript")
+      return False
+    other_cnts = self._count_ast_elements(other_soup)
     return self._difference_checker(other_cnts) 

Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py	2009-01-31 04:30:50 UTC (rev 18349)
+++ torflow/trunk/NetworkScanners/soat.py	2009-01-31 04:35:30 UTC (rev 18350)
@@ -569,8 +569,10 @@
     self.tests_run += 1
     # XXX: Set referrer to address for subsequent fetches
     # XXX: Set referrer to random or none for initial fetch
+    # XXX: Watch for spider-traps! (ie mutually sourcing iframes)
+    # Keep a trail log for this test and check for loops
     address = random.choice(self.targets)
-    
+
     self.fetch_queue.put_nowait(("html", address))
     while not self.fetch_queue.empty():
       (test, url) = self.fetch_queue.get_nowait()
@@ -613,7 +615,6 @@
       plog("ERROR", self.proto+" 3-way failure at "+exit_node+". This makes "+str(err_cnt)+" node failures for "+address)
 
   def _add_recursive_targets(self, soup, orig_addr):
-    # XXX: Watch for spider-traps! (ie mutually sourcing iframes)
     # Only pull at most one filetype from the list of 'a' links
     targets = []
     got_type = {}
@@ -635,9 +636,10 @@
                 for a in t.attrs:
                   if str(a[0]) == "type" and str(a[1]) in link_script_types:
                     targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
+                    plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
               else:
                 targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
-              plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
+                plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
               targets.append(("html", urlparse.urljoin(orig_addr, attr_tgt)))
             elif str(t.name) == 'a':
               if attr_name == "href":
@@ -723,9 +725,12 @@
       plog("ERROR", "Javascript 3-way failure at "+exit_node+" for "+address)
 
       return TEST_FAILURE
+  
+  def check_html_notags(self, address):
+    pass
 
   def check_html(self, address):
-    # XXX: Check mimetype to decide what to do..
+    # TODO: Break this out into a check_html_notags that just does a sha check
     ''' check whether a http connection to a given address is molested '''
     plog('INFO', 'Conducting an html test with destination ' + address)
 
@@ -733,9 +738,8 @@
     socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
     socket.socket = socks.socksocket
 
-    # XXX: Wikipedia and others can give us 403.. So what do we do about that?
-    # Probably should count the number of occurrances vs successful runs
-    # then remove the url
+    # Wikipedia and others can give us 403.. So what do we do about that?
+    # Count the number of occurrances vs successful runs then remove the url
     (pcode, pcontent) = http_request(address, self.tor_cookie_jar, self.headers)
 
     # reset the connection to direct