[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r18318: {torflow} Woo hoo! A false positive filter that works! Yay! Someone sh (torflow/trunk/NetworkScanners)



Author: mikeperry
Date: 2009-01-29 09:22:47 -0500 (Thu, 29 Jan 2009)
New Revision: 18318

Modified:
   torflow/trunk/NetworkScanners/libsoat.py
   torflow/trunk/NetworkScanners/soat.py
   torflow/trunk/NetworkScanners/soatstats.py
Log:

Woo hoo! A false positive filter that works! Yay! Someone
should verify this isn't too permisive. Also, attributes to
filter for were updated by combining XSS filters from around
the internets.



Modified: torflow/trunk/NetworkScanners/libsoat.py
===================================================================
--- torflow/trunk/NetworkScanners/libsoat.py	2009-01-29 13:17:43 UTC (rev 18317)
+++ torflow/trunk/NetworkScanners/libsoat.py	2009-01-29 14:22:47 UTC (rev 18318)
@@ -8,6 +8,9 @@
 import pickle
 import sys
 import time
+import difflib
+sys.path.append("./libs")
+from BeautifulSoup.BeautifulSoup import BeautifulSoup, Tag
 
 import sets
 from sets import Set
@@ -40,6 +43,7 @@
 # Failed reasons
 FAILURE_EXITONLY = "FailureExitOnly"
 FAILURE_DYNAMICTAGS = "FailureDynamicTags" 
+FAILURE_DYNAMICBINARY = "FailureDynamicBinary" 
 FAILURE_COOKIEMISMATCH = "FailureCookieMismatch"
 
 # classes to use with pickle to dump test results into files
@@ -232,11 +236,10 @@
 
     for root, dirs, files in os.walk(dir):
       for file in files:
-        if file.endswith('result'):
+        if file[:-41].endswith('result'):
           fh = open(os.path.join(root, file))
           result = pickle.load(fh)
           results.append(result)
-
     return results
 
   def safeFilename(self, str):
@@ -272,3 +275,86 @@
     pickle.dump(result, result_file)
     result_file.close()
 
+class SoupDiffer:
+  """ Diff two soup tag sets, optionally writing diffs to outfile. """
+  def __init__(self, soup_old, soup_new):
+    self.soup_old = soup_old
+    self.soup_new = soup_new
+
+  def changed_tags(self):
+    """ Return a list of tags changed or added to soup_new as strings """
+    tags_old = sets.Set(map(str, 
+           [tag for tag in self.soup_old.findAll() if isinstance(tag, Tag)]))
+    tags_new = sets.Set(map(str, 
+           [tag for tag in self.soup_new.findAll() if isinstance(tag, Tag)]))
+    ret = list(tags_new - tags_old)
+    ret.sort()
+    return ret
+
+  def _get_attributes(self):
+    attrs_old = [tag.attrs for tag in self.soup_old.findAll()]
+    attrs_new = [tag.attrs for tag in self.soup_new.findAll()]
+    attr_old = []
+    for attr_list in attrs_old:
+      attr_old.extend(attr_list) 
+    attr_new = []
+    for attr_list in attrs_new:
+      attr_new.extend(attr_list)
+    return (attr_old, attr_new)
+    
+  def changed_attributes(self):
+    """ Return a list of attributes added to soup_new """
+    (attr_old, attr_new) = self._get_attributes()
+    ret = list(sets.Set(attr_new) - sets.Set(attr_old))
+    ret.sort()
+    return ret
+
+  def changed_content(self):
+    """ Return a list of tag contents changed in soup_new """
+    tags_old = sets.Set(map(str, 
+      [tag for tag in self.soup_old.findAll() if not isinstance(tag, Tag)]))
+    tags_new = sets.Set(map(str, 
+      [tag for tag in self.soup_new.findAll() if not isinstance(tag, Tag)]))
+    ret = list(tags_new - tags_old)
+    ret.sort()
+    return ret
+
+  def diff_tags(self):
+    tags_old = map(str, [tag for tag in self.soup_old.findAll() if isinstance(tag, Tag)])
+    tags_new = map(str, [tag for tag in self.soup_new.findAll() if isinstance(tag, Tag)])
+    tags_old.sort()
+    tags_new.sort()
+    diff = difflib.SequenceMatcher(None, tags_old, tags_new)
+    return diff
+
+  def diff_attributes(self):
+    (attr_old, attr_new) = self._get_attributes()
+    attr_old.sort()
+    attr_new.sort()
+    diff = difflib.SequenceMatcher(None, attr_old, attr_new)
+    return diff
+
+  def diff_content(self):
+    tags_old = sets.Set(map(str, 
+      [tag for tag in self.soup_old.findAll() if not isinstance(tag, Tag)]))
+    tags_new = sets.Set(map(str, 
+      [tag for tag in self.soup_new.findAll() if not isinstance(tag, Tag)]))
+    diff = difflib.SequenceMatcher(None, tags_old, tags_new)
+    return diff
+
+  def __str__(self):
+    tags = self.changed_tags()
+    out = "Tags:\n"+"\n".join(tags)
+    attrs = self.changed_attributes()
+    out += "\n\nAttrs:\n"
+    for a in attrs:
+      out += a[0]+"="+a[1]+"\n"
+    content = self.changed_content()
+    out += "\n\nContent:\n"+"\n".join(map(str, content))
+    return out
+
+  def write_diff(self, outfile):
+    f = open(outfile, "w")
+    f.write(str(self))
+    f.close()
+ 

Modified: torflow/trunk/NetworkScanners/soat.py
===================================================================
--- torflow/trunk/NetworkScanners/soat.py	2009-01-29 13:17:43 UTC (rev 18317)
+++ torflow/trunk/NetworkScanners/soat.py	2009-01-29 14:22:47 UTC (rev 18318)
@@ -59,6 +59,7 @@
 from OpenSSL import *
 
 sys.path.append("./libs/")
+# XXX: Try to determine if we should be using MinimalSoup
 from BeautifulSoup.BeautifulSoup import BeautifulSoup, SoupStrainer, Tag
 from SocksiPy import socks
 import Pyssh.pyssh
@@ -82,7 +83,7 @@
 
 
 firefox_headers = {
-  'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1) Gecko/20061010 Firefox/2.0',
+  'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5'
   'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
   'Accept-Language':"en-us,en;q=0.5",
   'Accept-Encoding':"gzip,deflate",
@@ -158,9 +159,6 @@
 # Tags and attributes to check in the http test.
 # The general idea is to grab tags with attributes known
 # to either hold script, or cause automatic network actvitity
-# See: http://www.w3.org/TR/REC-html40/index/attributes.html
-# http://www.w3.org/TR/REC-html40/index/elements.html  
-# and http://ha.ckers.org/xss.html
 # Note: the more we add, the greater the potential for false positives...  
 # We also only care about the ones that work for FF2/FF3. 
 
@@ -170,24 +168,55 @@
 # Unfortunately, Google munges its javascript, so we have to do
 # some more advanced processing to reach that goal..
 # Also, I'm somewhat torn on dropping 'a' tags..
-tags_to_check = ['a', 'applet', 'area', 'base', 'body', 'embed', 'form',
+tags_to_check = ['a', 'applet', 'area', 'base', 'embed', 'form',
                  'frame', 'iframe', 'img', 'input', 'link', 'meta', 
-                 'object', 'script', 'style']
+                 'object', 'script', 'style', 'layer', 'ilayer']
 tags_preserve_inner = ['script','style'] 
-attrs_to_check =  ['background', 'cite', 'classid', 'codebase', 'data', 
-                   'longdesc', 'onblur', 
-                   'onchange', 'onclick', 'ondblclick', 'onfocus', 'onkeydown', 
-                   'onkeypress', 'onkeyup','onload', 'onmousedown', 'onmousemove', 
-                   'onmouseout', 'onmouseover','onmouseup', 'onreset', 'onselect', 
-                   'onsubmit', 'onunload', 'profile', 'src', 'usemap']
+
+# Merged from:
+# http://www.w3.org/TR/REC-html40/index/attributes.html
+# http://www.w3.org/TR/REC-html40/index/elements.html  
+# http://web.archive.org/web/20060113072810/www.mozilla.org/docs/dom/domref/dom_event_ref33.html
+# http://scrivna.com/blog/2008/09/18/php-xss-filtering-function/
+# https://svn.typo3.org/TYPO3v4/Core/trunk/typo3/contrib/RemoveXSS/RemoveXSS.php
+# http://www.expertzzz.com/Downloadz/view/3424
+# http://kallahar.com/smallprojects/php_xss_filter_function.php
+# and http://ha.ckers.org/xss.html
+attrs_to_check = ['background', 'cite', 'classid', 'codebase', 'data',
+'longdesc', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate',
+'onattrmodified', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut',
+'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint',
+'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'onbroadcast',
+'oncellchange', 'onchange', 'oncharacterdatamodified', 'onclick', 'onclose',
+'oncommand', 'oncommandupdate', 'oncontextmenu', 'oncontrolselect', 'oncopy',
+'oncut', 'ondataavaible', 'ondataavailable', 'ondatasetchanged',
+'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragdrop',
+'ondragend', 'ondragenter', 'ondragexit', 'ondraggesture', 'ondragleave',
+'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate',
+'onfilterchange', 'onfilterupdate', 'onfinish', 'onfocus', 'onfocusin',
+'onfocusout', 'onhelp', 'oninput', 'onkeydown', 'onkeypress', 'onkeyup',
+'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter',
+'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup',
+'onmousewheel', 'onmove', 'onmoveend', 'onmoveout', 'onmovestart',
+'onnodeinserted', 'onnodeinsertedintodocument', 'onnoderemoved',
+'onnoderemovedfromdocument', 'onoverflowchanged', 'onpaint', 'onpaste',
+'onpopupHidden', 'onpopupHiding', 'onpopupShowing', 'onpopupShown',
+'onpropertychange', 'onreadystatechange', 'onreset', 'onresize',
+'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete',
+'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange',
+'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onsubtreemodified',
+'ontext', 'onunderflow', 'onunload', 'overflow', 'profile', 'src', 'style',
+'usemap']
+attrs_to_check_map = {}
+for a in attrs_to_check: attrs_to_check_map[a]=1
 attrs_to_prune = ['alt', 'label', 'prompt' 'standby', 'summary', 'title',
                   'abbr']
 
-
+# For recursive fetching of urls:
 tags_to_recurse = ['a', 'applet', 'embed', 'frame', 'iframe', #'img',
-                   'link', 'object', 'script'] 
-recurse_html = ['frame', 'iframe']
-attrs_to_recurse = ['background', 'classid', 'codebase', 'data', 'href',
+                   'link', 'object', 'script', 'layer', 'ilayer'] 
+recurse_html = ['frame', 'iframe', 'layer', 'ilayer']
+attrs_to_recurse = ['background', 'codebase', 'data', 'href',
                     'pluginurl', 'src']
 
 #
@@ -237,6 +266,7 @@
     traceback.print_exc()
     return (0, "")
 
+  # TODO: Consider also returning mime type here
   return (reply.code, content)
 
 class Test:
@@ -379,12 +409,10 @@
   def __init__(self, mt, wordlist, filetypes=scan_filetypes):
     SearchBasedTest.__init__(self, mt, "HTTP", 80, wordlist)
     self.fetch_targets = 5
-    self.three_way_fails = {}
     self.httpcode_fails = {}
-    self.two_way_fails = {}
+    self.exit_fails = {}
     self.successes = {}
-    self.three_way_limit = 10
-    self.two_way_limit = 100
+    self.exit_limit = 100
     self.httpcode_limit = 100
     self.scan_filetypes = filetypes
     self.results = []
@@ -446,28 +474,28 @@
  
   def remove_target(self, address):
     SearchBasedTest.remove_target(self, address)
-    del self.httpcode_limit[address]
-    del self.three_way_limit[address]
-    del self.successes[address]
-    del self.two_way_limit[address]
+    if address in self.httpcode_fails: del self.httpcode_fails[address]
+    if address in self.successes: del self.successes[address]
+    if address in self.exit_fails: del self.exit_fails[address]
     kill_results = []
     for r in self.results:
       if r.site == address:
         kill_results.append(r)
     for r in kill_results:
+      # XXX: Move files instead of removing them..
       #r.remove_files()
       self.results.remove(r)
     
   def register_exit_failure(self, address, exit_node):
-    if address in self.two_way_fails:
-      self.two_way_fails[address].add(exit_node)
+    if address in self.exit_fails:
+      self.exit_fails[address].add(exit_node)
     else:
-      self.two_way_fails[address] = sets.Set([exit_node])
+      self.exit_fails[address] = sets.Set([exit_node])
 
     # TODO: Do something if abundance of succesful tests?
     # Problem is this can still trigger for localized content
-    err_cnt = len(self.two_way_fails[address])
-    if err_cnt > self.two_way_limit:
+    err_cnt = len(self.exit_fails[address])
+    if err_cnt > self.exit_limit:
       if address not in self.successes: self.successes[address] = 0
       plog("NOTICE", "Excessive HTTP 2-way failure ("+str(err_cnt)+" vs "+str(self.successes[address])+") for "+address+". Removing.")
   
@@ -493,23 +521,6 @@
     else:
       plog("ERROR", self.proto+" http error code failure at "+exit_node+". This makes "+str(err_cnt)+" node failures for "+address)
     
-  def register_dynamic_failure(self, address, exit_node):
-    if address in self.three_way_fails:
-      self.three_way_fails[address].add(exit_node)
-    else:
-      self.three_way_fails[address] = sets.Set([exit_node])
-    
-    err_cnt = len(self.three_way_fails[address])
-    if err_cnt > self.three_way_limit:
-      # Remove all associated data for this url.
-      # (Note, this also seems to imply we should report BadExit in bulk,
-      # after we've had a chance for these false positives to be weeded out)
-      if address not in self.successes: self.successes[address] = 0
-      plog("NOTICE", "Excessive HTTP 3-way failure ("+str(err_cnt)+" vs "+str(self.successes[address])+") for "+address+". Removing.")
-
-      self.remove_target(address)
-    else:
-      plog("ERROR", self.proto+" 3-way failure at "+exit_node+". This makes "+str(err_cnt)+" node failures for "+address)
  
   def check_http(self, address):
     ''' check whether a http connection to a given address is molested '''
@@ -648,7 +659,7 @@
     exit_content_file.close()
 
     result = HttpTestResult(exit_node, address, TEST_FAILURE, 
-                            FAILURE_DYNAMICTAGS, sha1sum_new.hexdigest(), 
+                            FAILURE_DYNAMICBINARY, sha1sum_new.hexdigest(), 
                             psha1sum.hexdigest(), new_content_file.name,
                             exit_content_file.name, 
                             content_prefix+'.content-old',
@@ -656,7 +667,9 @@
     self.results.append(result)
     self.datahandler.saveResult(result)
 
-    self.register_dynamic_failure(address, exit_node)
+    # The HTTP Test should remove address immediately.
+    plog("NOTICE", "HTTP Test is removing dynamic URL "+address)
+    self.remove_target(address)
     return TEST_FAILURE
 
 class HTMLTest(HTTPTest):
@@ -667,6 +680,8 @@
     self.min_targets = 9
     self.recurse_filetypes = recurse_filetypes
     self.fetch_queue = Queue.Queue()
+    self.dynamic_fails = {}
+    self.dynamic_limit = 10
  
   def run_test(self):
     # A single test should have a single cookie jar
@@ -698,6 +713,28 @@
   def get_targets(self):
     return self.get_search_urls('http', self.fetch_targets) 
 
+  def remove_target(self, address):
+    HTTPTest.remove_target(self, address)
+    if address in self.dynamic_fails: del self.dynamic_fails[address]
+
+  def register_dynamic_failure(self, address, exit_node):
+    if address in self.dynamic_fails:
+      self.dynamic_fails[address].add(exit_node)
+    else:
+      self.dynamic_fails[address] = sets.Set([exit_node])
+    
+    err_cnt = len(self.dynamic_fails[address])
+    if err_cnt > self.dynamic_limit:
+      # Remove all associated data for this url.
+      # (Note, this also seems to imply we should report BadExit in bulk,
+      # after we've had a chance for these false positives to be weeded out)
+      if address not in self.successes: self.successes[address] = 0
+      plog("NOTICE", "Excessive HTTP 3-way failure ("+str(err_cnt)+" vs "+str(self.successes[address])+") for "+address+". Removing.")
+
+      self.remove_target(address)
+    else:
+      plog("ERROR", self.proto+" 3-way failure at "+exit_node+". This makes "+str(err_cnt)+" node failures for "+address)
+
   def _add_recursive_targets(self, soup, orig_addr):
     # XXX: Watch for spider-traps! (ie mutually sourcing iframes)
     # Only pull at most one filetype from the list of 'a' links
@@ -732,7 +769,7 @@
     if str(tag.name) in tags_to_check:
       return False
     for attr in tag.attrs:
-      if attr[0] in attrs_to_check:
+      if attr[0] in attrs_to_check_map:
         return False
     return True
  
@@ -760,7 +797,7 @@
     for tag in to_extract:
       tag.extract()
     return soup      
- 
+
   def check_html(self, address):
     ''' check whether a http connection to a given address is molested '''
     plog('INFO', 'Conducting an html test with destination ' + address)
@@ -925,33 +962,55 @@
       else: self.successes[address]=1
       return TEST_SUCCESS
 
-    # TODO: Can we create some kind of diff/masking filter
-    # between the two non-Tor soups, and apply it to the
-    # Tor soup, to see if anything additional has changed?
-    # http://bramcohen.livejournal.com/37690.html
-    #  -> patiencediff.py vs difflib
-    #     "For small files difflib wins". And it's standard. Yay!
-    tor_v_new = difflib.SequenceMatcher(lambda x: x == " ", str(psoup), str(soup_new))
-    tor_v_orig = difflib.SequenceMatcher(lambda x: x == " ", str(psoup), str(soup))
-    orig_v_new = difflib.SequenceMatcher(lambda x: x == " ", str(soup), str(soup_new))
+    # Lets try getting just the tag differences
+    # 1. Take difference between old and new tags both ways
+    # 2. Make map of tags that change to their attributes
+    # 3. Compare list of changed tags for tor vs new and
+    #    see if any extra tags changed or if new attributes
+    #    were added to additional tags
+    old_vs_new = SoupDiffer(soup, soup_new)
+    new_vs_old = SoupDiffer(soup_new, soup)
+    new_vs_tor = SoupDiffer(soup_new, psoup)
 
-    # The key property is that the differences between the two non-tor fetches
-    # match the differences between the Tor and the regular fetches 
+    changed_tags = {}
+    # I'm an evil man and I'm going to CPU hell..
+    for tags in map(BeautifulSoup, old_vs_new.changed_tags()):
+      for t in tags.findAll():
+        if t.name not in changed_tags:
+          changed_tags[t.name] = sets.Set([])
+        for attr in t.attrs:
+          changed_tags[t.name].add(attr[0])
+    for tags in map(BeautifulSoup, new_vs_old.changed_tags()):
+      for t in tags.findAll():
+        if t.name not in changed_tags:
+          changed_tags[t.name] = sets.Set([])
+        for attr in t.attrs:
+          changed_tags[t.name].add(attr[0])
+    
+    changed_content = bool(old_vs_new.changed_content() or old_vs_new.changed_content())
 
-    plog("NOTICE", "Diffing charcateristics: "+str((orig_v_new.get_opcodes()==tor_v_orig.get_opcodes(),
-             orig_v_new.get_matching_blocks()==tor_v_orig.get_matching_blocks(),
-             orig_v_new.get_opcodes()==tor_v_new.get_opcodes(),
-             orig_v_new.get_matching_blocks()==tor_v_new.get_matching_blocks())))
+    false_positive = True 
+    for tags in map(BeautifulSoup, new_vs_tor.changed_tags()):
+      for t in tags.findAll():
+        if t.name not in changed_tags:
+          false_positive = False
+        else:
+           for attr in t.attrs:
+             if attr[0] not in changed_tags[t.name]:
+               false_positive = False
 
-    diff_file = open(failed_prefix+'.diffs.'+exit_node[1:],'w')
-    diff_file.write("orig_v_new.get_matching_blocks() =\n\t"+str(orig_v_new.get_matching_blocks())+"\n")
-    diff_file.write("orig_v_new.get_opcodes() =\n\t"+str(orig_v_new.get_opcodes())+"\n\n")
-    diff_file.write("tor_v_new.get_matching_blocks() =\n\t"+str(tor_v_new.get_matching_blocks())+"\n")
-    diff_file.write("tor_v_new.get_opcodes() =\n\t"+str(tor_v_new.get_opcodes())+"\n\n")
-    diff_file.write("tor_v_orig.get_matching_blocks() =\n\t"+str(tor_v_orig.get_matching_blocks())+"\n")
-    diff_file.write("tor_v_orig.get_opcodes() =\n\t"+str(tor_v_orig.get_opcodes())+"\n\n")
-    diff_file.close()
+    if new_vs_tor.changed_content() and not changed_content:
+      false_positive = False
 
+    if false_positive:
+      plog("NOTICE", "False positive detected for dynamic change at "+address+" via "+exit_node)
+      result = HtmlTestResult(exit_node, address, TEST_SUCCESS)
+      self.results.append(result)
+      #self.datahandler.saveResult(result)
+      if address in self.successes: self.successes[address]+=1
+      else: self.successes[address]=1
+      return TEST_SUCCESS
+
     # XXX: Check for existence of this file before overwriting
     exit_tag_file = open(failed_prefix+'.dyn-tags.'+exit_node[1:],'w')
     exit_tag_file.write(str(psoup))

Modified: torflow/trunk/NetworkScanners/soatstats.py
===================================================================
--- torflow/trunk/NetworkScanners/soatstats.py	2009-01-29 13:17:43 UTC (rev 18317)
+++ torflow/trunk/NetworkScanners/soatstats.py	2009-01-29 14:22:47 UTC (rev 18318)
@@ -96,6 +96,54 @@
       if node.counts[test].inconclusive != 0:
         print `node.idhex` + "\t" + `node.counts[test].inconclusive`
 
+
+  # False positive test left in for verifcation and tweaking
+  # TODO: Remove this bit eventually
+  for result in data:
+    if result.__class__.__name__ == "HtmlTestResult":
+      if not result.tags_old or not result.tags or not result.exit_tags:
+        continue
+      new_vs_old = SoupDiffer(BeautifulSoup(open(result.tags, "r").read()), 
+                BeautifulSoup(open(result.tags_old, 
+                               "r").read()))
+      old_vs_new = SoupDiffer(BeautifulSoup(open(result.tags_old, "r").read()), 
+                BeautifulSoup(open(result.tags, 
+                               "r").read()))
+      new_vs_tor = SoupDiffer(BeautifulSoup(open(result.tags, "r").read()), 
+                BeautifulSoup(open(result.exit_tags, 
+                               "r").read()))
+      changed_tags = {}
+      # I'm an evil man and I'm going to CPU hell..
+      for tags in map(BeautifulSoup, old_vs_new.changed_tags()):
+        for t in tags.findAll():
+          if t.name not in changed_tags:
+            changed_tags[t.name] = sets.Set([])
+          for attr in t.attrs:
+            changed_tags[t.name].add(attr[0])
+      for tags in map(BeautifulSoup, new_vs_old.changed_tags()):
+        for t in tags.findAll():
+          if t.name not in changed_tags:
+            changed_tags[t.name] = sets.Set([])
+          for attr in t.attrs:
+            changed_tags[t.name].add(attr[0])
+      
+      changed_content = bool(old_vs_new.changed_content() or old_vs_new.changed_content())
+  
+      false_positive = True 
+      for tags in map(BeautifulSoup, new_vs_tor.changed_tags()):
+        for t in tags.findAll():
+          if t.name not in changed_tags:
+            false_positive = False
+          else:
+             for attr in t.attrs:
+               if attr[0] not in changed_tags[t.name]:
+                 false_positive = False
+  
+      if new_vs_tor.changed_content() and not changed_content:
+        false_positive = False
+
+      print false_positive      
+
   print ""
 
 if __name__ == "__main__":