[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [stem/master] Support for compressed descriptors
commit 90ec648ed075245f6ed2bae42dd4c6968e15fab1
Author: Damian Johnson <atagar@xxxxxxxxxxxxxx>
Date: Sun Jul 21 15:57:47 2013 -0700
Support for compressed descriptors
Ummm... well, that was easy. When I was initially looking into tor's directory
responses I had trouble getting valid compressed results. Kostas suspected
that the responses might have been truncated. Oh well, maybe a wget thing. In
either case adding compression support to our module was trivial.
---
stem/descriptor/remote.py | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/stem/descriptor/remote.py b/stem/descriptor/remote.py
index 8ec6f1d..777f769 100644
--- a/stem/descriptor/remote.py
+++ b/stem/descriptor/remote.py
@@ -74,6 +74,7 @@ import sys
import threading
import time
import urllib2
+import zlib
import stem.descriptor
@@ -335,15 +336,17 @@ class Query(object):
self.download_url = self._pick_url(use_authority)
self.start_time = time.time()
- response = urllib2.urlopen(self.download_url, timeout = self.timeout)
+ response = urllib2.urlopen(self.download_url, timeout = self.timeout).read()
- # This sucks. We need to read the full response into memory before
- # processing the content. This is because urllib2 returns a 'file like'
- # object that lacks tell() or seek(). Hence we need to read it into our
- # own buffer that does support these.
+ if self.download_url.endswith('.z'):
+ response = zlib.decompress(response)
- response = io.BytesIO(response.read().strip())
- self._results = stem.descriptor.parse_file(response, self.descriptor_type, validate = self.validate, document_handler = self.document_handler)
+ self._results = stem.descriptor.parse_file(
+ io.BytesIO(response.strip()),
+ self.descriptor_type,
+ validate = self.validate,
+ document_handler = self.document_handler,
+ )
self.runtime = time.time() - self.start_time
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
@@ -434,7 +437,7 @@ class DescriptorDownloader(object):
fingerprints (this is due to a limit on the url length by squid proxies).
"""
- resource = '/tor/server/all'
+ resource = '/tor/server/all.z'
if isinstance(fingerprints, str):
fingerprints = [fingerprints]
@@ -443,7 +446,7 @@ class DescriptorDownloader(object):
if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE)
- resource = '/tor/server/fp/%s' % '+'.join(fingerprints)
+ resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
return self._query(resource)
@@ -462,7 +465,7 @@ class DescriptorDownloader(object):
fingerprints (this is due to a limit on the url length by squid proxies).
"""
- resource = '/tor/extra/all'
+ resource = '/tor/extra/all.z'
if isinstance(fingerprints, str):
fingerprints = [fingerprints]
@@ -471,7 +474,7 @@ class DescriptorDownloader(object):
if len(fingerprints) > MAX_DESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i descriptors at a time by their fingerprints" % MAX_DESCRIPTOR_BATCH_SIZE)
- resource = '/tor/extra/fp/%s' % '+'.join(fingerprints)
+ resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
return self._query(resource)
@@ -498,7 +501,7 @@ class DescriptorDownloader(object):
if len(hashes) > MAX_MICRODESCRIPTOR_BATCH_SIZE:
raise ValueError("Unable to request more than %i microdescriptors at a time by their hashes" % MAX_MICRODESCRIPTOR_BATCH_SIZE)
- return self._query('/tor/micro/d/%s' % '-'.join(hashes))
+ return self._query('/tor/micro/d/%s.z' % '-'.join(hashes))
def get_consensus(self, document_handler = stem.descriptor.DocumentHandler.ENTRIES, authority_v3ident = None):
"""
@@ -520,7 +523,7 @@ class DescriptorDownloader(object):
if authority_v3ident:
resource += '/%s' % authority_v3ident
- return self._query(resource, document_handler = document_handler)
+ return self._query(resource + '.z', document_handler = document_handler)
def _query(self, resource, descriptor_type = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES):
"""
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits