[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [fallback-scripts/master] update: py3: Deal with string encoding and type issues



commit a4fbed37e6bca92ea19a2b10fddc15b5c007c3aa
Author: teor <teor@xxxxxxxxxxxxxx>
Date:   Mon Dec 2 15:40:24 2019 +1000

    update: py3: Deal with string encoding and type issues
    
    Part of 28863.
---
 updateFallbackDirs.py | 45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/updateFallbackDirs.py b/updateFallbackDirs.py
index 57888a9..e97a501 100755
--- a/updateFallbackDirs.py
+++ b/updateFallbackDirs.py
@@ -69,8 +69,8 @@ HAVE_IPADDRESS = False
 try:
   # python 3 builtin, or install package py2-ipaddress
   # there are several ipaddress implementations for python 2
-  # with slightly different semantics with str typed text
-  # fortunately, all our IP addresses are in unicode
+  # with slightly different semantics with bytes
+  # to avoid these issues, we make sure our IP addresses are in six.text_type
   import ipaddress
   HAVE_IPADDRESS = True
 except ImportError:
@@ -601,7 +601,7 @@ def onionoo_fetch(what, **kwargs):
 
   # Unfortunately, the URL is too long for some OS filenames,
   # but we still don't want to get files from different URLs mixed up
-  base_file_name = what + '-' + hashlib.sha1(url).hexdigest()
+  base_file_name = what + '-' + hashlib.sha1(url.encode('ascii')).hexdigest()
 
   full_url_file_name = base_file_name + '.full_url'
   MAX_FULL_URL_LENGTH = 1024
@@ -712,8 +712,6 @@ class Candidate(object):
               'consensus_weight', 'or_addresses', 'dir_address']:
       if not f in details: raise Exception("Document has no %s field."%(f,))
 
-    if not 'contact' in details:
-      details['contact'] = None
     if not 'flags' in details or details['flags'] is None:
       details['flags'] = []
     if (not 'advertised_bandwidth' in details
@@ -724,10 +722,19 @@ class Candidate(object):
     if (not 'effective_family' in details
         or details['effective_family'] is None):
       details['effective_family'] = []
-    if not 'platform' in details:
-      details['platform'] = None
     details['last_changed_address_or_port'] = parse_ts(
                                       details['last_changed_address_or_port'])
+
+    # Handle fields that can have arbitrary bytes, but should be UTF-8
+    if not 'contact' in details:
+      details['contact'] = None
+    else:
+      details['contact'] = six.ensure_text(details['contact'], errors='replace')
+    if not 'platform' in details:
+      details['platform'] = None
+    else:
+      details['platform'] = six.ensure_text(details['platform'], errors='replace')
+
     self._data = details
     self._stable_sort_or_addresses()
 
@@ -761,7 +768,7 @@ class Candidate(object):
   # is_valid_ipv[46]_address by gsathya, karsten, 2013
   @staticmethod
   def is_valid_ipv4_address(address):
-    if not isinstance(address, (str, unicode)):
+    if not isinstance(address, six.string_types):
       return False
 
     # check if there are four period separated values
@@ -779,7 +786,7 @@ class Candidate(object):
 
   @staticmethod
   def is_valid_ipv6_address(address):
-    if not isinstance(address, (str, unicode)):
+    if not isinstance(address, six.string_types):
       return False
 
     # remove brackets
@@ -814,6 +821,7 @@ class Candidate(object):
 
   def _split_dirport(self):
     # Split the dir_address into dirip and dirport
+    self._data['dir_address'] = six.ensure_text(self._data['dir_address'])
     (self.dirip, _dirport) = self._data['dir_address'].split(':', 2)
     self.dirport = int(_dirport)
 
@@ -827,6 +835,7 @@ class Candidate(object):
     for i in self._data['or_addresses']:
       if i != self._data['or_addresses'][0]:
         logging.debug('Secondary IPv4 Address Used for %s: %s'%(self._fpr, i))
+      i = six.ensure_text(i)
       (ipaddr, port) = i.rsplit(':', 1)
       if (ipaddr == self.dirip) and Candidate.is_valid_ipv4_address(ipaddr):
         self.orport = int(port)
@@ -841,17 +850,21 @@ class Candidate(object):
     self.ipv6orport = None
     # Choose the first IPv6 address that uses the same port as the ORPort
     for i in self._data['or_addresses']:
+      i = six.ensure_text(i)
       (ipaddr, port) = i.rsplit(':', 1)
+      port = int(port)
       if (port == self.orport) and Candidate.is_valid_ipv6_address(ipaddr):
         self.ipv6addr = ipaddr
-        self.ipv6orport = int(port)
+        self.ipv6orport = port
         return
     # Choose the first IPv6 address in the list
     for i in self._data['or_addresses']:
+      i = six.ensure_text(i)
       (ipaddr, port) = i.rsplit(':', 1)
+      port = int(port)
       if Candidate.is_valid_ipv6_address(ipaddr):
         self.ipv6addr = ipaddr
-        self.ipv6orport = int(port)
+        self.ipv6orport = port
         return
 
   def _compute_version(self):
@@ -1306,16 +1319,16 @@ class Candidate(object):
 
   # strip leading and trailing brackets from an IPv6 address
   # safe to use on non-bracketed IPv6 and on IPv4 addresses
-  # also convert to unicode, and make None appear as ''
+  # also make None appear as ''
   @staticmethod
   def strip_ipv6_brackets(ip):
     if ip is None:
-      return unicode('')
+      return ''
     if len(ip) < 2:
-      return unicode(ip)
+      return ip
     if ip[0] == '[' and ip[-1] == ']':
-      return unicode(ip[1:-1])
-    return unicode(ip)
+      return ip[1:-1]
+    return ip
 
   # are ip_a and ip_b in the same netblock?
   # mask_bits is the size of the netblock



_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits