[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [stem/master] Support a DocumentHandler when reading consensus documents
commit 20c36088b536022995d9f10475a6386dbd5d4fdb
Author: Damian Johnson <atagar@xxxxxxxxxxxxxx>
Date: Wed Aug 14 14:04:23 2019 -0700
Support a DocumentHandler when reading consensus documents
When reading a consensus our callers need the ability to specify what form they
wish to receive the document (just the document, a document with the relays
populated, or just the relays).
Unfortunately CollecTor has separate annotations for v3 consensus and vote
documents, whereas all other classes have at most one. This requires rethinking
the annotation constants but for the moment moving on to bigger fish.
---
stem/descriptor/collector.py | 21 +++++++++++++--------
stem/descriptor/networkstatus.py | 2 ++
test/unit/descriptor/collector.py | 9 ++++++++-
3 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index 09e94e74..7c2447a2 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -78,10 +78,11 @@ import shutil
import tempfile
import time
+import stem.descriptor
import stem.util.connection
import stem.util.str_tools
-from stem.descriptor import Compression, parse_file
+from stem.descriptor import Compression, DocumentHandler
COLLECTOR_URL = 'https://collector.torproject.org/'
REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old
@@ -178,14 +179,14 @@ def get_microdescriptors(start = None, end = None, cache_to = None, timeout = No
yield desc
-def get_consensus(start = None, end = None, cache_to = None, version = 3, microdescriptor = False, timeout = None, retries = 3):
+def get_consensus(start = None, end = None, cache_to = None, document_handler = DocumentHandler.ENTRIES, version = 3, microdescriptor = False, timeout = None, retries = 3):
"""
Shorthand for
:func:`~stem.descriptor.collector.CollecTor.get_consensus`
on our singleton instance.
"""
- for desc in get_instance().get_consensus(start, end, cache_to, version, microdescriptor, timeout, retries):
+ for desc in get_instance().get_consensus(start, end, cache_to, document_handler, version, microdescriptor, timeout, retries):
yield desc
@@ -216,7 +217,7 @@ class File(object):
self._guessed_type = File._guess_descriptor_types(path)
self._downloaded_to = None # location we last downloaded to
- def read(self, directory = None, descriptor_type = None, timeout = None, retries = 3):
+ def read(self, directory = None, descriptor_type = None, document_handler = DocumentHandler.ENTRIES, timeout = None, retries = 3):
"""
Provides descriptors from this archive. Descriptors are downloaded or read
from disk as follows...
@@ -239,6 +240,8 @@ class File(object):
:param str descriptor_type: `descriptor type
<https://metrics.torproject.org/collector.html#data-formats>`_, this is
guessed if not provided
+ :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+ which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:param int timeout: timeout when connection becomes idle, no timeout
applied if **None**
:param int retires: maximum attempts to impose
@@ -269,7 +272,7 @@ class File(object):
tmp_directory = tempfile.mkdtemp()
- for desc in self.read(tmp_directory, descriptor_type, timeout, retries):
+ for desc in self.read(tmp_directory, descriptor_type, document_handler, timeout, retries):
yield desc
shutil.rmtree(tmp_directory)
@@ -281,7 +284,7 @@ class File(object):
# Archives can contain multiple descriptor types, so parsing everything and
# filtering to what we're after.
- for desc in parse_file(path):
+ for desc in stem.descriptor.parse_file(path, document_handler = document_handler):
desc_annotation = type(desc).TYPE_ANNOTATION_NAME
if descriptor_type is None or (desc_annotation and descriptor_type.startswith(desc_annotation)):
@@ -497,7 +500,7 @@ class CollecTor(object):
for desc in f.read(cache_to, 'microdescriptor', timeout = timeout, retries = retries):
yield desc
- def get_consensus(self, start = None, end = None, cache_to = None, version = 3, microdescriptor = False, timeout = None, retries = 3):
+ def get_consensus(self, start = None, end = None, cache_to = None, document_handler = DocumentHandler.ENTRIES, version = 3, microdescriptor = False, timeout = None, retries = 3):
"""
Provides consensus router status entries published during the given time
range, sorted oldest to newest.
@@ -506,6 +509,8 @@ class CollecTor(object):
:param datetime.datetime end: time range to end with
:param str cache_to: directory to cache archives into, if an archive is
available here it is not downloaded
+ :var stem.descriptor.__init__.DocumentHandler document_handler: method in
+ which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:param int version: consensus variant to retrieve (versions 2 or 3)
:param bool microdescriptor: provides the microdescriptor consensus if
**True**, standard consensus otherwise
@@ -536,7 +541,7 @@ class CollecTor(object):
# TODO: document vs router status entries (ie. DocumentType)?
for f in self.files(desc_type, start, end):
- for desc in f.read(cache_to, desc_type, timeout = timeout, retries = retries):
+ for desc in f.read(cache_to, desc_type, document_handler, timeout = timeout, retries = retries):
yield desc
def index(self, compression = 'best'):
diff --git a/stem/descriptor/networkstatus.py b/stem/descriptor/networkstatus.py
index b0589f2a..dd7759f5 100644
--- a/stem/descriptor/networkstatus.py
+++ b/stem/descriptor/networkstatus.py
@@ -1032,6 +1032,8 @@ class NetworkStatusDocumentV3(NetworkStatusDocument):
Added the bandwidth_file_digest attributbute.
"""
+ TYPE_ANNOTATION_NAME = 'network-status-consensus-3' # TODO: can also be network-status-vote-3
+
ATTRIBUTES = {
'version': (None, _parse_header_network_status_version_line),
'version_flavor': ('ns', _parse_header_network_status_version_line),
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
index f5bed87e..7d1f0205 100644
--- a/test/unit/descriptor/collector.py
+++ b/test/unit/descriptor/collector.py
@@ -8,7 +8,7 @@ import unittest
import stem.prereq
-from stem.descriptor import Compression
+from stem.descriptor import Compression, DocumentHandler
from stem.descriptor.collector import CollecTor, File
from test.unit.descriptor import get_resource
from test.unit.descriptor.data.collector.index import EXAMPLE_INDEX
@@ -255,6 +255,13 @@ class TestCollector(unittest.TestCase):
self.assertEqual('RouterStatusEntryV3', type(f).__name__)
self.assertEqual('000A10D43011EA4928A35F610405F92B4433B4DC', f.fingerprint)
+ descriptors = list(stem.descriptor.collector.get_consensus(document_handler = DocumentHandler.DOCUMENT))
+ self.assertEqual(2, len(descriptors))
+
+ f = descriptors[0]
+ self.assertEqual('NetworkStatusDocumentV3', type(f).__name__)
+ self.assertEqual(35, len(f.routers))
+
# this archive shouldn't have any v2 or microdescriptor consensus data
self.assertEqual(0, len(list(stem.descriptor.collector.get_consensus(version = 2))))
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits