[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [bridgedb/develop] Minor cleanup for htmlify_string() from commits faf48983 and ccb3b8d1.
commit 0b62526335ed532fee5b2b1b3cba4ea04424333a
Author: Isis Lovecruft <isis@xxxxxxxxxxxxxx>
Date: Thu Jun 4 02:58:01 2015 +0000
Minor cleanup for htmlify_string() from commits faf48983 and ccb3b8d1.
First, we don't need the bridgedb.util.htmlify_string() function, since
Mako provides syntax for "piping" content about to be written to the
page through filters [0] and it even provides a HTML-escaping function
(by default, it uses markupsafe.escape).
Second, we do actually need to remove control characters (at the very
least, but since technically we're parsing unicodeâ?¦ removing all
escape/control characters isn't actually feasible) from bridge lines.
* REMOVE bridgedb.util.htmlify_string().
* CHANGE lib/bridgedb/templates/bridges.html to use Mako's builtin text
filtering syntax and functions for replacing HTML token characters
with their corresponding HTML special characters.
* ADD bridgedb.util.replaceControlChars().
* ADD unittests to ensure that everything is escaped properly.
[0]: http://docs.makotemplates.org/en/latest/filtering.html
---
lib/bridgedb/HTTPServer.py | 12 ++--
lib/bridgedb/templates/bridges.html | 8 ++-
lib/bridgedb/test/test_HTTPServer.py | 109 ++++++++++++++++++++++++++++++++--
lib/bridgedb/util.py | 50 +++++++++-------
4 files changed, 145 insertions(+), 34 deletions(-)
diff --git a/lib/bridgedb/HTTPServer.py b/lib/bridgedb/HTTPServer.py
index c969908..beff4bd 100644
--- a/lib/bridgedb/HTTPServer.py
+++ b/lib/bridgedb/HTTPServer.py
@@ -56,7 +56,7 @@ from bridgedb.qrcodes import generateQR
from bridgedb.safelog import logSafely
from bridgedb.schedule import Unscheduled
from bridgedb.schedule import ScheduledInterval
-from bridgedb.util import htmlify_string
+from bridgedb.util import replaceControlChars
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), 'templates')
@@ -739,12 +739,11 @@ class WebResourceBridges(resource.Resource):
self.nBridgesToGive,
countryCode,
bridgeFilterRules=rules)
- bridgeLines = "".join("%s\n" % b.getConfigLine(
+ bridgeLines = [replaceControlChars(b.getConfigLine(
includeFingerprint=self.includeFingerprints,
addressClass=addressClass,
transport=transport,
- request=bridgedb.Dist.uniformMap(ip)
- ) for b in bridges)
+ request=bridgedb.Dist.uniformMap(ip))) for b in bridges]
answer = self.renderAnswer(request, bridgeLines, format)
return answer
@@ -772,7 +771,7 @@ class WebResourceBridges(resource.Resource):
if format == 'plain':
request.setHeader("Content-Type", "text/plain")
try:
- rendered = bytes(bridgeLines)
+ rendered = bytes('\n'.join(bridgeLines))
except Exception as err:
rendered = replaceErrorPage(err)
else:
@@ -790,8 +789,7 @@ class WebResourceBridges(resource.Resource):
rtl=rtl,
lang=langs[0],
answer=bridgeLines,
- qrcode=qrcode,
- htmlify_string=htmlify_string)
+ qrcode=qrcode)
except Exception as err:
rendered = replaceErrorPage(err)
diff --git a/lib/bridgedb/templates/bridges.html b/lib/bridgedb/templates/bridges.html
index 0ecf1d4..b5ac544 100644
--- a/lib/bridgedb/templates/bridges.html
+++ b/lib/bridgedb/templates/bridges.html
@@ -1,8 +1,7 @@
## -*- coding: utf-8 -*-
<%inherit file="base.html"/>
-<%page args="strings, rtl=False, lang='en', answer=0, qrcode=0,
- htmlify_string=None, **kwargs"/>
+<%page args="strings, rtl=False, lang='en', answer=0, qrcode=0, **kwargs"/>
</div>
</div>
@@ -67,7 +66,10 @@
<div class="row" id="bridgesrow1">
<div class="col col-lg-12">
<div class="bridge-lines" id="bridgelines">
-${htmlify_string(answer)}
+## See http://docs.makotemplates.org/en/latest/filtering.html
+% for bridgeline in answer:
+${bridgeline | h,trim}<br />
+% endfor
</div>
</div>
</div>
diff --git a/lib/bridgedb/test/test_HTTPServer.py b/lib/bridgedb/test/test_HTTPServer.py
index 68616bf..d597c55 100644
--- a/lib/bridgedb/test/test_HTTPServer.py
+++ b/lib/bridgedb/test/test_HTTPServer.py
@@ -422,6 +422,8 @@ class DummyBridge(object):
"""A mock :class:`bridgedb.Bridges.Bridge` which only supports a mocked
``getConfigLine`` method."""
+ ptArgs = {}
+
def _randORPort(self): return random.randint(9001, 9999)
def _randPTPort(self): return random.randint(6001, 6666)
def _returnFour(self): return random.randint(2**24, 2**32-1)
@@ -457,16 +459,32 @@ class DummyBridge(object):
line.append(str(transport))
line.append("%s:%s" % (self.ip, self.orport))
if includeFingerprint is True: line.append(self.fingerprint)
+ if self.ptArgs:
+ line.append(','.join(['='.join(x) for x in self.ptArgs.items()]))
bridgeLine = " ".join([item for item in line])
#print "Created config line: %r" % bridgeLine
return bridgeLine
+class DummyMaliciousBridge(DummyBridge):
+ """A mock :class:`bridgedb.Bridges.Bridge` which only supports a mocked
+ ``getConfigLine`` method and which maliciously insert an additional fake
+ bridgeline and some javascript into its PT arguments.
+ """
+ ptArgs = {
+ "eww": "\rBridge 1.2.3.4:1234",
+ "bad": "\nBridge 6.6.6.6:6666 0123456789abcdef0123456789abcdef01234567",
+ "evil": "<script>alert('fuuuu');</script>",
+ }
+
+
class DummyIPBasedDistributor(object):
"""A mocked :class:`bridgedb.Dist.IPBasedDistributor` which is used to test
:class:`bridgedb.HTTPServer.WebResourceBridges.
"""
+ _bridge_class = DummyBridge
+
def _dumbAreaMapper(ip): return ip
def __init__(self, areaMapper=None, nClusters=None, key=None,
@@ -486,7 +504,7 @@ class DummyIPBasedDistributor(object):
"""Needed because it's called in
:meth:`WebResourceBridges.getBridgesForIP`.
"""
- return [DummyBridge() for _ in xrange(N)]
+ return [self._bridge_class() for _ in xrange(N)]
class DummyRequest(requesthelper.DummyRequest):
@@ -520,11 +538,19 @@ class WebResourceBridgesTests(unittest.TestCase):
self.dist = DummyIPBasedDistributor()
self.sched = ScheduledInterval(1, 'hour')
self.nBridgesPerRequest = 2
+
+ def useBenignBridges(self):
+ self.dist._bridge_class = DummyBridge
self.bridgesResource = HTTPServer.WebResourceBridges(
- self.dist, self.sched, N=2,
- #useForwardedHeader=True,
+ self.dist, self.sched, N=self.nBridgesPerRequest,
includeFingerprints=True)
+ self.root.putChild(self.pagename, self.bridgesResource)
+ def useMaliciousBridges(self):
+ self.dist._bridge_class = DummyMaliciousBridge
+ self.bridgesResource = HTTPServer.WebResourceBridges(
+ self.dist, self.sched, N=self.nBridgesPerRequest,
+ includeFingerprints=True)
self.root.putChild(self.pagename, self.bridgesResource)
def parseBridgesFromHTMLPage(self, page):
@@ -550,8 +576,76 @@ class WebResourceBridgesTests(unittest.TestCase):
return bridges
+ def test_render_GET_malicious_newlines(self):
+ """Test rendering a request when the some of the bridges returned have
+ malicious (HTML, Javascript, etc., in their) PT arguments.
+ """
+ self.useMaliciousBridges()
+
+ request = DummyRequest([self.pagename])
+ request.method = b'GET'
+ request.getClientIP = lambda: '1.1.1.1'
+
+ page = self.bridgesResource.render(request)
+ self.assertTrue(
+ 'bad=Bridge 6.6.6.6:6666 0123456789abcdef0123456789abcdef01234567' in str(page),
+ "Newlines in bridge lines should be removed.")
+
+ def test_render_GET_malicious_returnchar(self):
+ """Test rendering a request when the some of the bridges returned have
+ malicious (HTML, Javascript, etc., in their) PT arguments.
+ """
+ self.useMaliciousBridges()
+
+ request = DummyRequest([self.pagename])
+ request.method = b'GET'
+ request.getClientIP = lambda: '1.1.1.1'
+
+ page = self.bridgesResource.render(request)
+ self.assertTrue(
+ 'eww=Bridge 1.2.3.4:1234' in str(page),
+ "Return characters in bridge lines should be removed.")
+
+ def test_render_GET_malicious_javascript(self):
+ """Test rendering a request when the some of the bridges returned have
+ malicious (HTML, Javascript, etc., in their) PT arguments.
+ """
+ self.useMaliciousBridges()
+
+ request = DummyRequest([self.pagename])
+ request.method = b'GET'
+ request.getClientIP = lambda: '1.1.1.1'
+
+ page = self.bridgesResource.render(request)
+ self.assertTrue(
+ "evil=<script>alert('fuuuu');</script>" in str(page),
+ ("The characters &, <, >, ', and \" in bridge lines should be "
+ "replaced with their corresponding HTML special characters."))
+
+ def test_renderAnswer_GET_textplain_malicious(self):
+ """If the request format specifies 'plain', we should return content
+ with mimetype 'text/plain' and ASCII control characters replaced.
+ """
+ self.useMaliciousBridges()
+
+ request = DummyRequest([self.pagename])
+ request.args.update({'format': ['plain']})
+ request.getClientIP = lambda: '4.4.4.4'
+ request.method = b'GET'
+
+ page = self.bridgesResource.render(request)
+ self.assertTrue("html" not in str(page))
+ self.assertTrue(
+ 'eww=Bridge 1.2.3.4:1234' in str(page),
+ "Return characters in bridge lines should be removed.")
+ self.assertTrue(
+ 'bad=Bridge 6.6.6.6:6666' in str(page),
+ "Newlines in bridge lines should be removed.")
+
def test_render_GET_vanilla(self):
"""Test rendering a request for normal, vanilla bridges."""
+ self.useBenignBridges()
+
request = DummyRequest([self.pagename])
request.method = b'GET'
request.getClientIP = lambda: '1.1.1.1'
@@ -577,6 +671,8 @@ class WebResourceBridgesTests(unittest.TestCase):
"""The client's IP address should be obtainable from the
'X-Forwarded-For' header in the request.
"""
+ self.useBenignBridges()
+
self.bridgesResource.useForwardedHeader = True
request = DummyRequest([self.pagename])
request.method = b'GET'
@@ -594,6 +690,8 @@ class WebResourceBridgesTests(unittest.TestCase):
def test_render_GET_RTLlang(self):
"""Test rendering a request for plain bridges in Arabic."""
+ self.useBenignBridges()
+
request = DummyRequest([b"bridges?transport=obfs3"])
request.method = b'GET'
request.getClientIP = lambda: '3.3.3.3'
@@ -614,6 +712,8 @@ class WebResourceBridgesTests(unittest.TestCase):
def test_render_GET_RTLlang_obfs3(self):
"""Test rendering a request for obfs3 bridges in Farsi."""
+ self.useBenignBridges()
+
request = DummyRequest([b"bridges?transport=obfs3"])
request.method = b'GET'
request.getClientIP = lambda: '3.3.3.3'
@@ -643,11 +743,12 @@ class WebResourceBridgesTests(unittest.TestCase):
self.assertGreater(int(port), 0)
self.assertLessEqual(int(port), 65535)
-
def test_renderAnswer_textplain(self):
"""If the request format specifies 'plain', we should return content
with mimetype 'text/plain'.
"""
+ self.useBenignBridges()
+
request = DummyRequest([self.pagename])
request.args.update({'format': ['plain']})
request.getClientIP = lambda: '4.4.4.4'
diff --git a/lib/bridgedb/util.py b/lib/bridgedb/util.py
index 580f31f..7f906d7 100644
--- a/lib/bridgedb/util.py
+++ b/lib/bridgedb/util.py
@@ -180,26 +180,6 @@ def levenshteinDistance(s1, s2, len1=None, len2=None,
memo[key] = distance
return distance
-htmlify_string_map = {
- '<': '<',
- '>': '>',
- '&': '&',
- '"': '"',
- "'": ''',
- '\n': '<br/>'
- }
-def htmlify_string(s):
- """Encode HTML special characters, and newlines, in s.
-
- >>> htmlify_string('<script>alert("badthink");</script>')
- '<script>alert("badthink");</script>'
- >>> htmlify_string('bridge 1\nbridge 2')
- 'bridge 1<br/>bridge 2'
-
- :param str s: The string to encode.
- """
- return ''.join(map((lambda ch: htmlify_string_map.get(ch, ch)), s))
-
def isascii(s):
"""Return True if there are no non-ASCII characters in s, False otherwise.
@@ -235,6 +215,36 @@ def isascii_noncontrol(s):
"""
return all(map((lambda ch: 32 <= ord(ch) < 127), s))
+def replaceControlChars(text, replacement=None, encoding="utf-8"):
+ """Remove ASCII control characters [0-31, 92, 127].
+
+ >>> replaceControlChars('foo\n bar\\ baz\r \t\0quux\n')
+ 'foo bar baz quux'
+ >>> replaceControlChars("\bI wonder if I'm outside the quotes now")
+ "I wonder if I'm outside the quotes now"
+
+ :param str text: Some text to remove ASCII control characters from.
+ :param int replacement: If given, the **replacement** should be an integer
+ representing the decimal representation of the byte to replace
+ occurences of ASCII control characters with. For example, if they
+ should be replaced with the character ``'a'``, then ``97`` should be
+ used as the **replacement**, because ``ord('a') == 97``.
+ :param str encoding: The encoding of the **text**.
+ :rtype: str
+ :returns: The sanitized **text**.
+ """
+ escaped = bytearray()
+
+ for byte in bytearray(text, encoding):
+ if byte in range(0, 32) + [92, 127]:
+ if replacement:
+ byte = replacement
+ else:
+ continue
+ escaped += bytearray([byte])
+
+ return str(escaped)
+
class JustifiedLogFormatter(logging.Formatter):
"""A logging formatter which pretty prints thread and calling function
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits