[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[ooni-probe/master] Use the same logic for encoding header values and keys as measurement… (#739)
commit 582f85501a072dfd8f2f6ebbdb28dd615a8db325
Author: Arturo Filastò <arturo@xxxxxxxxxxx>
Date: Tue Apr 4 16:48:07 2017 +0000
Use the same logic for encoding header values and keys as measurementâ?¦ (#739)
* Use the same logic for encoding header values and keys as measurement-kit
* Fix edge case that lead us to not do stripping of response headers
when response body was null.
* Bump all the tests version that depend on httpt test template
This will allow us to distinguish when the fix for the body length is
rolled out
---
ooni/common/http_utils.py | 30 +++-------------------
ooni/nettests/blocking/facebook_messenger.py | 2 +-
ooni/nettests/blocking/http_requests.py | 2 +-
ooni/nettests/blocking/meek_fronted_requests.py | 2 +-
ooni/nettests/blocking/web_connectivity.py | 2 +-
ooni/nettests/blocking/whatsapp.py | 2 +-
ooni/nettests/experimental/domclass_collector.py | 2 +-
.../experimental/http_keyword_filtering.py | 2 +-
.../experimental/http_uk_mobile_networks.py | 1 +
ooni/nettests/experimental/squid.py | 2 +-
ooni/nettests/manipulation/captiveportal.py | 2 +-
.../manipulation/http_header_field_manipulation.py | 2 +-
ooni/nettests/manipulation/http_host.py | 2 +-
ooni/nettests/scanning/http_url_list.py | 2 +-
ooni/nettests/third_party/psiphon.py | 2 +-
ooni/templates/httpt.py | 24 +++++++++++------
ooni/tests/test_common.py | 22 +++++-----------
17 files changed, 39 insertions(+), 64 deletions(-)
diff --git a/ooni/common/http_utils.py b/ooni/common/http_utils.py
index 57c3a15c..b44f5c0b 100644
--- a/ooni/common/http_utils.py
+++ b/ooni/common/http_utils.py
@@ -1,37 +1,13 @@
import re
-import codecs
from base64 import b64encode
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>!;]+)')
def representBody(body):
if not body:
return body
- # XXX perhaps add support for decoding gzip in the future.
- body = body.replace('\0', '')
- decoded = False
- charsets = ['ascii', 'utf-8']
-
- # If we are able to detect the charset of body from the meta tag
- # try to decode using that one first
- charset = META_CHARSET_REGEXP.search(body, re.IGNORECASE)
- if charset:
- try:
- encoding = charset.group(1).lower()
- codecs.lookup(encoding)
- charsets.insert(0, encoding)
- except (LookupError, IndexError):
- # Skip invalid codecs and partial regexp match
- pass
-
- for encoding in charsets:
- try:
- body = unicode(body, encoding)
- decoded = True
- break
- except UnicodeDecodeError:
- pass
- if not decoded:
+ try:
+ body = unicode(body, 'utf-8')
+ except UnicodeDecodeError:
body = {
'data': b64encode(body),
'format': 'base64'
diff --git a/ooni/nettests/blocking/facebook_messenger.py b/ooni/nettests/blocking/facebook_messenger.py
index 651fca0d..9d711fd8 100644
--- a/ooni/nettests/blocking/facebook_messenger.py
+++ b/ooni/nettests/blocking/facebook_messenger.py
@@ -46,7 +46,7 @@ class FacebookMessengerTest(httpt.HTTPTest, dnst.DNSTest):
description = ("This test examines the reachability of Facebook "
"Messenger in your network.")
author = "Arturo Filastò"
- version = "0.4.0"
+ version = "0.5.0"
requiresRoot = False
requiresTor = False
diff --git a/ooni/nettests/blocking/http_requests.py b/ooni/nettests/blocking/http_requests.py
index 6f0276be..483f92ef 100644
--- a/ooni/nettests/blocking/http_requests.py
+++ b/ooni/nettests/blocking/http_requests.py
@@ -40,7 +40,7 @@ class HTTPRequestsTest(httpt.HTTPTest):
description = ("Performs a HTTP GET request over Tor and one over the "
"local network and compares the two results.")
author = "Arturo Filastò"
- version = "0.2.5"
+ version = "0.3.0"
usageOptions = UsageOptions
diff --git a/ooni/nettests/blocking/meek_fronted_requests.py b/ooni/nettests/blocking/meek_fronted_requests.py
index 5918cd4b..f516f84e 100644
--- a/ooni/nettests/blocking/meek_fronted_requests.py
+++ b/ooni/nettests/blocking/meek_fronted_requests.py
@@ -30,7 +30,7 @@ class meekTest(httpt.HTTPTest):
name = "Meek fronted requests test"
description = "This test examines whether the domains used by Meek "\
"(a type of Tor bridge) work in your network."
- version = "0.0.1"
+ version = "0.1.0"
usageOptions = UsageOptions
inputFile = ['file', 'f', None,
diff --git a/ooni/nettests/blocking/web_connectivity.py b/ooni/nettests/blocking/web_connectivity.py
index e5085380..600a9e4e 100644
--- a/ooni/nettests/blocking/web_connectivity.py
+++ b/ooni/nettests/blocking/web_connectivity.py
@@ -48,7 +48,7 @@ class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest):
"connect to the resolved IPs and then fetching the page "
"and comparing all these results with those of a control.")
author = "Arturo Filastò"
- version = "0.2.0"
+ version = "0.3.0"
contentDecoders = [('gzip', GzipDecoder)]
diff --git a/ooni/nettests/blocking/whatsapp.py b/ooni/nettests/blocking/whatsapp.py
index 6957e620..b7a670b4 100644
--- a/ooni/nettests/blocking/whatsapp.py
+++ b/ooni/nettests/blocking/whatsapp.py
@@ -264,7 +264,7 @@ class WhatsappTest(httpt.HTTPTest, dnst.DNSTest):
description = ("This test examines the reachability of WhatsApp "
" and WhatsApp's web interface (web.whatsapp.com) in your network.")
author = "Arturo Filastò"
- version = "0.5.0"
+ version = "0.6.0"
requiresRoot = False
requiresTor = False
diff --git a/ooni/nettests/experimental/domclass_collector.py b/ooni/nettests/experimental/domclass_collector.py
index efd5dbc3..94dd1f66 100644
--- a/ooni/nettests/experimental/domclass_collector.py
+++ b/ooni/nettests/experimental/domclass_collector.py
@@ -13,7 +13,7 @@ from ooni.templates import httpt
class DOMClassCollector(httpt.HTTPTest):
name = "DOM class collector"
author = "Arturo Filastò"
- version = 0.1
+ version = "0.2.0"
followRedirects = True
diff --git a/ooni/nettests/experimental/http_keyword_filtering.py b/ooni/nettests/experimental/http_keyword_filtering.py
index cbf12d1e..865441e0 100644
--- a/ooni/nettests/experimental/http_keyword_filtering.py
+++ b/ooni/nettests/experimental/http_keyword_filtering.py
@@ -21,7 +21,7 @@ class HTTPKeywordFiltering(httpt.HTTPTest):
"""
name = "HTTP Keyword Filtering"
author = "Arturo Filastò"
- version = "0.1.1"
+ version = "0.2.0"
inputFile = ['file', 'f', None, 'List of keywords to use for censorship testing']
diff --git a/ooni/nettests/experimental/http_uk_mobile_networks.py b/ooni/nettests/experimental/http_uk_mobile_networks.py
index 2ac0bd92..bf9a2fec 100644
--- a/ooni/nettests/experimental/http_uk_mobile_networks.py
+++ b/ooni/nettests/experimental/http_uk_mobile_networks.py
@@ -25,6 +25,7 @@ class HTTPUKMobileNetworksTest(httpt.HTTPTest):
XXX port the knowledge from the trac ticket into this test docstring
"""
name = "HTTP UK mobile network redirect test"
+ version = "0.1.0"
usageOptions = UsageOptions
diff --git a/ooni/nettests/experimental/squid.py b/ooni/nettests/experimental/squid.py
index cf976ba6..4e44091b 100644
--- a/ooni/nettests/experimental/squid.py
+++ b/ooni/nettests/experimental/squid.py
@@ -18,7 +18,7 @@ class SquidTest(httpt.HTTPTest):
"""
name = "Squid test"
author = "Arturo Filastò"
- version = "0.1"
+ version = "0.2.0"
optParameters = [['backend', 'b', 'http://ooni.nu/test/', 'Test backend to use']]
diff --git a/ooni/nettests/manipulation/captiveportal.py b/ooni/nettests/manipulation/captiveportal.py
index 844e4119..8cc945e5 100644
--- a/ooni/nettests/manipulation/captiveportal.py
+++ b/ooni/nettests/manipulation/captiveportal.py
@@ -62,7 +62,7 @@ class CaptivePortal(httpt.HTTPTest, dnst.DNSTest):
name = "captiveportal"
description = "Captive Portal Test."
- version = '0.3'
+ version = "0.4.0"
author = "Isis Lovecruft"
usageOptions = UsageOptions
requiresRoot = False
diff --git a/ooni/nettests/manipulation/http_header_field_manipulation.py b/ooni/nettests/manipulation/http_header_field_manipulation.py
index fcd5e0e1..a9c92d56 100644
--- a/ooni/nettests/manipulation/http_header_field_manipulation.py
+++ b/ooni/nettests/manipulation/http_header_field_manipulation.py
@@ -50,7 +50,7 @@ class HTTPHeaderFieldManipulation(httpt.HTTPTest):
description = "Checks if the HTTP request the server " \
"sees is the same as the one that the client has created."
author = "Arturo Filastò"
- version = "0.1.5"
+ version = "0.2.0"
randomizeUA = False
usageOptions = UsageOptions
diff --git a/ooni/nettests/manipulation/http_host.py b/ooni/nettests/manipulation/http_host.py
index 2e0a8e1a..40d8d355 100644
--- a/ooni/nettests/manipulation/http_host.py
+++ b/ooni/nettests/manipulation/http_host.py
@@ -42,7 +42,7 @@ class HTTPHost(httpt.HTTPTest):
description = "Tests a variety of different filter bypassing techniques "\
"based on the HTTP Host header field."
author = "Arturo Filastò"
- version = "0.2.4"
+ version = "0.3.0"
randomizeUA = False
usageOptions = UsageOptions
diff --git a/ooni/nettests/scanning/http_url_list.py b/ooni/nettests/scanning/http_url_list.py
index 8d268dfa..dedc7ad8 100644
--- a/ooni/nettests/scanning/http_url_list.py
+++ b/ooni/nettests/scanning/http_url_list.py
@@ -25,7 +25,7 @@ class HTTPURLList(httpt.HTTPTest):
"""
name = "HTTP URL List"
author = "Arturo Filastò"
- version = "0.1.3"
+ version = "0.2.0"
usageOptions = UsageOptions
diff --git a/ooni/nettests/third_party/psiphon.py b/ooni/nettests/third_party/psiphon.py
index f4b0033a..2821e0a8 100644
--- a/ooni/nettests/third_party/psiphon.py
+++ b/ooni/nettests/third_party/psiphon.py
@@ -34,7 +34,7 @@ class PsiphonTest(httpt.HTTPTest, process.ProcessTest):
description = ("Bootstraps Psiphon and "
"does a HTTP GET for the specified URL.")
author = "juga"
- version = "0.1.0"
+ version = "0.2.0"
timeout = 120
usageOptions = UsageOptions
diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index ab080c44..c6850384 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -167,19 +167,27 @@ class HTTPTest(NetTestCase):
getattr(response.request, 'absoluteURI', None)):
session['request']['url'] = response.request.absoluteURI
- if self.localOptions.get('withoutbody', 0) is 0:
- response_body = representBody(response_body)
- else:
- response_body = ''
+ response_headers = {}
+ for name, value in response.headers.getAllRawHeaders():
+ response_headers[name] = value[0]
- response_headers = _representHeaders(response.headers)
# Attempt to redact the IP address of the probe from the responses
- if (config.privacy.includeip is False and probe_ip.address is not None and
- (isinstance(response_body, str) or isinstance(response_body, unicode))):
- response_body = response_body.replace(probe_ip.address, "[REDACTED]")
+ if config.privacy.includeip is False and \
+ probe_ip.address is not None:
+ if isinstance(response_body, (str, unicode)):
+ response_body = response_body.replace(probe_ip.address, "[REDACTED]")
+
for key, value in response_headers.items():
response_headers[key] = value.replace(probe_ip.address,
"[REDACTED]")
+ for key, value in response_headers.items():
+ response_headers[key] = representBody(value)
+
+ if self.localOptions.get('withoutbody', 0) is 0:
+ response_body = representBody(response_body)
+ else:
+ response_body = ''
+
session['response'] = {
'headers': response_headers,
'body': response_body,
diff --git a/ooni/tests/test_common.py b/ooni/tests/test_common.py
index 40d3859f..c8437683 100644
--- a/ooni/tests/test_common.py
+++ b/ooni/tests/test_common.py
@@ -5,26 +5,16 @@ from twisted.web.client import readBody
from . import is_internet_connected
-from ooni.common.http_utils import META_CHARSET_REGEXP
+from ooni.common.http_utils import representBody
from ooni.common.ip_utils import is_public_ipv4_address, is_private_ipv4_address
from ooni.common.txextra import FixedRedirectAgent, TrueHeadersAgent, TrueHeaders
class TestHTTPUtils(unittest.TestCase):
- def test_charset_detection(self):
- no_charset_html = """
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html>
-<head>
- <title>Foo</title>
-"""
- with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
- with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">'
- with_two_charsets = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8;charset=utf-8">'
- self.assertEqual(META_CHARSET_REGEXP.search(no_charset_html), None)
- self.assertEqual(META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1')
- self.assertEqual(META_CHARSET_REGEXP.search(
- with_two_charsets).group(1), 'UTF-8')
- self.assertEqual(META_CHARSET_REGEXP.search(with_empty_charset), None)
+ def test_represent_body(self):
+ self.assertEqual(representBody(None), None)
+ self.assertEqual(representBody("spam\xcf\x83"), u'spam\u03c3')
+ self.assertEqual(representBody("\xff\x00"),
+ {'data': '/wA=', 'format': 'base64'})
class TestIPUtils(unittest.TestCase):
def test_is_public_ipv4(self):
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits