[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [ooni-probe/master] Ignore empty charset values



commit d80a68098dc4fd13a4963e26d6c4e5ed2ad31b43
Author: Arturo Filastò <arturo@xxxxxxxxxxx>
Date:   Sun May 8 17:55:46 2016 +0200

    Ignore empty charset values
---
 ooni/templates/httpt.py      | 2 +-
 ooni/tests/test_templates.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ooni/templates/httpt.py b/ooni/templates/httpt.py
index 2a17f5b..51fba1a 100644
--- a/ooni/templates/httpt.py
+++ b/ooni/templates/httpt.py
@@ -17,7 +17,7 @@ from ooni.utils.net import StringProducer, userAgents
 from ooni.utils.trueheaders import TrueHeaders
 from ooni.errors import handleAllFailures
 
-META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>]*)')
+META_CHARSET_REGEXP = re.compile('<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"\']*([^\s"\'/>]+)')
 
 class InvalidSocksProxyOption(Exception):
     pass
diff --git a/ooni/tests/test_templates.py b/ooni/tests/test_templates.py
index 5b2c77a..e8fe636 100644
--- a/ooni/tests/test_templates.py
+++ b/ooni/tests/test_templates.py
@@ -54,8 +54,10 @@ class TestHTTPT(unittest.TestCase):
         <title>Foo</title>
 """
         with_charset_html = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
+        with_empty_charset = no_charset_html + '\n<meta http-equiv="Content-Type" content="text/html; charset=">'
         self.assertEqual(httpt.META_CHARSET_REGEXP.search(no_charset_html), None)
         self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_charset_html).group(1), 'iso-8859-1')
+        self.assertEqual(httpt.META_CHARSET_REGEXP.search(with_empty_charset), None)
 
 class TestDNST(unittest.TestCase):
     def setUp(self):



_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits