[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r18350: {torflow} Fix a couple log messages and make PyPy existence optional. (torflow/trunk/NetworkScanners)
Author: mikeperry
Date: 2009-01-30 23:35:30 -0500 (Fri, 30 Jan 2009)
New Revision: 18350
Modified:
torflow/trunk/NetworkScanners/libsoat.py
torflow/trunk/NetworkScanners/soat.py
Log:
Fix a couple log messages and make PyPy existence optional.
Modified: torflow/trunk/NetworkScanners/libsoat.py
===================================================================
--- torflow/trunk/NetworkScanners/libsoat.py 2009-01-31 04:30:50 UTC (rev 18349)
+++ torflow/trunk/NetworkScanners/libsoat.py 2009-01-31 04:35:30 UTC (rev 18350)
@@ -20,11 +20,14 @@
sys.path.append("../")
from TorCtl.TorUtil import *
-sys.path.append("./libs/pypy-svn/")
-import pypy.rlib.parsing.parsing
-import pypy.lang.js.jsparser
+try:
+ sys.path.append("./libs/pypy-svn/")
+ import pypy.rlib.parsing.parsing
+ import pypy.lang.js.jsparser
+ HAVE_PYPY = True
+except ImportError:
+ HAVE_PYPY = False
-
# constants
TEST_SUCCESS = 0
@@ -402,10 +405,10 @@
f.write(str(self))
f.close()
-
class JSDiffer:
+ # XXX: Strip html comments from these strings
def __init__(self, js_string):
- self.ast_cnts = self.count_ast_elements(js_string)
+ if HAVE_PYPY: self.ast_cnts = self._count_ast_elements(js_string)
def _ast_recursive_worker(ast, ast_cnts):
if not ast.symbol in ast_cnts:
@@ -416,7 +419,7 @@
JSDiffer._ast_recursive_worker(child, ast_cnts)
_ast_recursive_worker = Callable(_ast_recursive_worker)
- def count_ast_elements(self, js_string, name="global"):
+ def _count_ast_elements(self, js_string, name="global"):
ast_cnts = {}
try:
ast = pypy.lang.js.jsparser.parse(js_string)
@@ -456,11 +459,15 @@
return False
def prune_differences(self, other_string):
- other_cnts = self.count_ast_elements(other_string)
+ if not HAVE_PYPY: return
+ other_cnts = self._count_ast_elements(other_string)
self._difference_pruner(other_cnts)
def contains_differences(self, other_string):
- other_cnts = self.count_ast_elements(other_string)
+ if not HAVE_PYPY:
+ plog("NOTICE", "PyPy import not present. Not diffing javascript")
+ return False
+ other_cnts = self._count_ast_elements(other_string)
return self._difference_checker(other_cnts)
class JSSoupDiffer(JSDiffer):
@@ -477,7 +484,7 @@
return ret_cnts
_add_cnts = Callable(_add_cnts)
- def count_ast_elements(self, soup, name="Soup"):
+ def _count_ast_elements(self, soup, name="Soup"):
ast_cnts = {}
for tag in soup.findAll():
if tag.name == 'script':
@@ -485,7 +492,7 @@
if isinstance(child, Tag):
plog("ERROR", "Script tag with subtag!")
else:
- tag_cnts = JSDiffer.count_ast_elements(self, str(child), tag.name)
+ tag_cnts = JSDiffer._count_ast_elements(self, str(child), tag.name)
ast_cnts = JSSoupDiffer._add_cnts(tag_cnts, ast_cnts)
for attr in tag.attrs:
# hrmm.. %-encoding too? Firefox negs on it..
@@ -496,14 +503,18 @@
elif attr[0] in attrs_with_raw_script_map:
parse = str(attr[1])
if not parse: continue
- tag_cnts = JSDiffer.count_ast_elements(self,parse,tag.name+":"+attr[0])
+ tag_cnts = JSDiffer._count_ast_elements(self,parse,tag.name+":"+attr[0])
ast_cnts = JSSoupDiffer._add_cnts(tag_cnts, ast_cnts)
return ast_cnts
def prune_differences(self, other_soup):
- other_cnts = self.count_ast_elements(other_soup)
+ if not HAVE_PYPY: return
+ other_cnts = self._count_ast_elements(other_soup)
self._difference_pruner(other_cnts)
def contains_differences(self, other_soup):
- other_cnts = self.count_ast_elements(other_soup)
+ if not HAVE_PYPY:
+ plog("NOTICE", "PyPy import not present. Not diffing javascript")
+ return False
+ other_cnts = self._count_ast_elements(other_soup)
return self._difference_checker(other_cnts)
Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py 2009-01-31 04:30:50 UTC (rev 18349)
+++ torflow/trunk/NetworkScanners/soat.py 2009-01-31 04:35:30 UTC (rev 18350)
@@ -569,8 +569,10 @@
self.tests_run += 1
# XXX: Set referrer to address for subsequent fetches
# XXX: Set referrer to random or none for initial fetch
+ # XXX: Watch for spider-traps! (ie mutually sourcing iframes)
+ # Keep a trail log for this test and check for loops
address = random.choice(self.targets)
-
+
self.fetch_queue.put_nowait(("html", address))
while not self.fetch_queue.empty():
(test, url) = self.fetch_queue.get_nowait()
@@ -613,7 +615,6 @@
plog("ERROR", self.proto+" 3-way failure at "+exit_node+". This makes "+str(err_cnt)+" node failures for "+address)
def _add_recursive_targets(self, soup, orig_addr):
- # XXX: Watch for spider-traps! (ie mutually sourcing iframes)
# Only pull at most one filetype from the list of 'a' links
targets = []
got_type = {}
@@ -635,9 +636,10 @@
for a in t.attrs:
if str(a[0]) == "type" and str(a[1]) in link_script_types:
targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
+ plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
else:
targets.append(("js", urlparse.urljoin(orig_addr, attr_tgt)))
- plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
+ plog("NOTICE", "Adding js "+str(t.name)+" target: "+attr_tgt)
targets.append(("html", urlparse.urljoin(orig_addr, attr_tgt)))
elif str(t.name) == 'a':
if attr_name == "href":
@@ -723,9 +725,12 @@
plog("ERROR", "Javascript 3-way failure at "+exit_node+" for "+address)
return TEST_FAILURE
+
+ def check_html_notags(self, address):
+ pass
def check_html(self, address):
- # XXX: Check mimetype to decide what to do..
+ # TODO: Break this out into a check_html_notags that just does a sha check
''' check whether a http connection to a given address is molested '''
plog('INFO', 'Conducting an html test with destination ' + address)
@@ -733,9 +738,8 @@
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, tor_host, tor_port)
socket.socket = socks.socksocket
- # XXX: Wikipedia and others can give us 403.. So what do we do about that?
- # Probably should count the number of occurrances vs successful runs
- # then remove the url
+ # Wikipedia and others can give us 403.. So what do we do about that?
+ # Count the number of occurrances vs successful runs then remove the url
(pcode, pcontent) = http_request(address, self.tor_cookie_jar, self.headers)
# reset the connection to direct