[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r18338: {torflow} Ladies and gentlemen, we diff javascript. This is just a pro (torflow/trunk/NetworkScanners)
Author: mikeperry
Date: 2009-01-30 05:22:02 -0500 (Fri, 30 Jan 2009)
New Revision: 18338
Modified:
torflow/trunk/NetworkScanners/soatstats.py
Log:
Ladies and gentlemen, we diff javascript. This is just a
prototype I was running on my existing results. Checking it
in because I'm about to refactor all of it and want to have a
working copy on record.
Modified: torflow/trunk/NetworkScanners/soatstats.py
===================================================================
--- torflow/trunk/NetworkScanners/soatstats.py 2009-01-30 03:40:20 UTC (rev 18337)
+++ torflow/trunk/NetworkScanners/soatstats.py 2009-01-30 10:22:02 UTC (rev 18338)
@@ -15,6 +15,47 @@
import libsoat
from libsoat import *
+sys.path.append("../")
+from TorCtl.TorUtil import *
+
+sys.path.append("./libs/pypy-svn/")
+import pypy.rlib.parsing.parsing
+import pypy.lang.js.jsparser
+
+attrs_with_raw_script = [
+'onabort', 'onactivate', 'onafterprint', 'onafterupdate',
+'onattrmodified', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut',
+'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint',
+'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'onbroadcast',
+'oncellchange', 'onchange', 'oncharacterdatamodified', 'onclick', 'onclose',
+'oncommand', 'oncommandupdate', 'oncontextmenu', 'oncontrolselect', 'oncopy',
+'oncut', 'ondataavaible', 'ondataavailable', 'ondatasetchanged',
+'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragdrop',
+'ondragend', 'ondragenter', 'ondragexit', 'ondraggesture', 'ondragleave',
+'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate',
+'onfilterchange', 'onfilterupdate', 'onfinish', 'onfocus', 'onfocusin',
+'onfocusout', 'onhelp', 'oninput', 'onkeydown', 'onkeypress', 'onkeyup',
+'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter',
+'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
+'onmousewheel', 'onmove', 'onmoveend', 'onmoveout', 'onmovestart',
+'onnodeinserted', 'onnodeinsertedintodocument', 'onnoderemoved',
+'onnoderemovedfromdocument', 'onoverflowchanged', 'onpaint', 'onpaste',
+'onpopupHidden', 'onpopupHiding', 'onpopupShowing', 'onpopupShown',
+'onpropertychange', 'onreadystatechange', 'onreset', 'onresize',
+'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete',
+'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange',
+'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onsubtreemodified',
+'ontext', 'onunderflow', 'onunload'
+]
+attrs_to_check = ['background', 'cite', 'classid', 'codebase', 'data',
+'longdesc', 'profile', 'src', 'style', 'usemap']
+attrs_to_check.extend(attrs_with_raw_script)
+attrs_to_check_map = {}
+for __a in attrs_to_check: attrs_to_check_map[__a]=1
+attrs_with_raw_script_map = {}
+for __a in attrs_with_raw_script: attrs_with_raw_script_map[__a]=1
+
+
class ResultCount:
def __init__(self, type):
self.type = type
@@ -103,15 +144,20 @@
if result.__class__.__name__ == "HtmlTestResult":
if not result.tags_old or not result.tags or not result.exit_tags:
continue
- new_vs_old = SoupDiffer(BeautifulSoup(open(result.tags, "r").read()),
- BeautifulSoup(open(result.tags_old,
- "r").read()))
- old_vs_new = SoupDiffer(BeautifulSoup(open(result.tags_old, "r").read()),
- BeautifulSoup(open(result.tags,
- "r").read()))
- new_vs_tor = SoupDiffer(BeautifulSoup(open(result.tags, "r").read()),
- BeautifulSoup(open(result.exit_tags,
- "r").read()))
+ print result.exit_node
+
+ print result.tags
+ print result.tags_old
+ print result.exit_tags
+
+ new_soup = BeautifulSoup(open(result.tags, "r").read())
+ old_soup = BeautifulSoup(open(result.tags_old, "r").read())
+ tor_soup = BeautifulSoup(open(result.exit_tags, "r").read())
+
+ new_vs_old = SoupDiffer(new_soup, old_soup)
+ old_vs_new = SoupDiffer(old_soup, new_soup)
+ new_vs_tor = SoupDiffer(new_soup, tor_soup)
+
changed_tags = {}
changed_attributes = {}
# I'm an evil man and I'm going to CPU hell..
@@ -159,7 +205,101 @@
if new_vs_tor.changed_content() and not changed_content:
false_positive = False
+
+ def ast_recurse(ast, map):
+ if not ast.symbol in map:
+ map[ast.symbol] = 1
+ else: map[ast.symbol] += 1
+ if isinstance(ast, pypy.rlib.parsing.tree.Nonterminal):
+ for child in ast.children:
+ ast_recurse(child, map)
+
+ def count_ast(map, tags):
+ for tag_l in tags:
+ for tag in tag_l.findAll():
+ did_parse = False
+ if tag.name == 'script':
+ for child in tag.childGenerator():
+ if isinstance(child, Tag):
+ plog("ERROR", "Script tag with subtag!")
+ else:
+ try:
+ did_parse = True
+ ast = pypy.lang.js.jsparser.parse(str(child))
+ ast_recurse(ast, map)
+ except (pypy.rlib.parsing.deterministic.LexerError, UnicodeDecodeError, pypy.rlib.parsing.parsing.ParseError):
+ plog("NOTICE", "Parse error on "+str(child))
+ if not "ParseError"+tag.name in map:
+ map["ParseError"+tag.name] = 1
+ else: map["ParseError"+tag.name] +=1
+
+ for attr in tag.attrs:
+ # XXX: %-encoding too
+ parse = ""
+ if attr[1].replace(" ","")[:11] == "javascript:":
+ split_at = attr[1].find(":")+1
+ parse = str(attr[1][split_at:])
+ elif attr[0] in attrs_with_raw_script_map:
+ parse = str(attr[1])
+ if not parse: continue
+ try:
+ did_parse = True
+ ast = pypy.lang.js.jsparser.parse(parse)
+ ast_recurse(ast, map)
+ except (pypy.rlib.parsing.deterministic.LexerError, UnicodeDecodeError, pypy.rlib.parsing.parsing.ParseError):
+ plog("NOTICE", "Parse error on "+parse+" in "+attr[0]+"="+attr[1])
+ if not "ParseError"+tag.name+attr[0] in map:
+ map["ParseError"+tag.name+attr[0]] = 1
+ else: map["ParseError"+attr[0]] +=1
+
+ if false_positive:
+ # Use http://codespeak.net/pypy/dist/pypy/lang/js/ to parse
+ # links and attributes that contain javascript
+
+ old_vs_new_cnt = {}
+ count_ast(old_vs_new_cnt, [old_soup])
+ new_vs_old_cnt = {}
+ count_ast(new_vs_old_cnt, [new_soup])
+
+ # for each changed tag, count all tree elements in a hash table.
+ # Then, compare the counts between the two fetches
+ # If any count changes, mark its count as -1
+ # Make sure the terminal counts of the tor fetch match
+ # except for the -1 terminals
+
+ for node in old_vs_new_cnt.iterkeys():
+ if node not in new_vs_old_cnt:
+ plog("INFO", "Javascript AST element "+node+" absent..")
+ new_vs_old_cnt[node] = 0
+ elif new_vs_old_cnt[node] != old_vs_new_cnt[node]:
+ plog("INFO", "Javascript AST count differs for "+node+": "+str(new_vs_old_cnt[node])+" vs "+str(old_vs_new_cnt[node]))
+ new_vs_old_cnt[node] = 0
+
+ for node in new_vs_old_cnt.iterkeys():
+ if node not in old_vs_new_cnt:
+ plog("INFO", "Javascript AST element "+node+" absent..")
+ new_vs_old_cnt[node] = 0
+
+ new_vs_tor_cnt = {}
+ count_ast(new_vs_tor_cnt, [tor_soup])
+
+ for node in new_vs_old_cnt.iterkeys():
+ if not new_vs_old_cnt[node]:
+ continue
+ if node not in new_vs_tor_cnt:
+ plog("ERROR", "Javascript AST element "+node+" absent from Tor.")
+ false_positive = False
+ elif new_vs_old_cnt[node] != new_vs_tor_cnt[node]:
+ plog("ERROR", "Javascript AST count differs for "+node+": "+str(new_vs_old_cnt[node])+" vs "+str(new_vs_tor_cnt[node]))
+ false_positive = False
+
+ for node in new_vs_tor_cnt.iterkeys():
+ if node not in new_vs_old_cnt:
+ plog("ERROR", "Javascript AST element "+node+" present only in Tor")
+ false_positive = False
+
+
print false_positive
print ""