[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [stem/master] Helper to guess descriptor type
commit bee688e60c19c5ce8879ffbcc1e0c6b6c2e8597c
Author: Damian Johnson <atagar@xxxxxxxxxxxxxx>
Date: Thu Jul 11 14:35:40 2019 -0700
Helper to guess descriptor type
Ideally our index would include descriptor types, but without that next best
thing is to guess based on file paths.
---
stem/descriptor/collector.py | 55 +++++++++++++++++++++++++++++++++++++++
test/unit/descriptor/collector.py | 15 ++++++++++-
2 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/stem/descriptor/collector.py b/stem/descriptor/collector.py
index 1742c596..d94d5871 100644
--- a/stem/descriptor/collector.py
+++ b/stem/descriptor/collector.py
@@ -70,6 +70,40 @@ import stem.util.str_tools
COLLECTOR_URL = 'https://collector.torproject.org/'
REFRESH_INDEX_RATE = 3600 # get new index if cached copy is an hour old
+# mapping of path prefixes to their descriptor type (sampled 7/11/19)
+
+COLLECTOR_DESC_TYPES = {
+ 'archive/bridge-descriptors/server-descriptors/': 'bridge-server-descriptor 1.2',
+ 'archive/bridge-descriptors/extra-infos/': 'bridge-extra-info 1.3',
+ 'archive/bridge-descriptors/statuses/': 'bridge-network-status 1.1',
+ 'archive/bridge-pool-assignments/': 'bridge-pool-assignment 1.0',
+ 'archive/exit-lists/': 'tordnsel 1.0',
+ 'archive/relay-descriptors/bandwidths/': 'bandwidth-file 1.0',
+ 'archive/relay-descriptors/certs': 'dir-key-certificate-3 1.0',
+ 'archive/relay-descriptors/consensuses/': 'network-status-consensus-3 1.0',
+ 'archive/relay-descriptors/extra-infos/': 'extra-info 1.0',
+ 'archive/relay-descriptors/microdescs/': ('network-status-microdesc-consensus-3 1.0', 'microdescriptor 1.0'),
+ 'archive/relay-descriptors/server-descriptors/': 'server-descriptor 1.0',
+ 'archive/relay-descriptors/statuses/': 'network-status-2 1.0',
+ 'archive/relay-descriptors/tor/': 'directory 1.0',
+ 'archive/relay-descriptors/votes/': 'network-status-vote-3 1.0',
+ 'archive/torperf/': 'torperf 1.0',
+ 'archive/webstats/': (),
+ 'recent/bridge-descriptors/extra-infos/': 'bridge-extra-info 1.3',
+ 'recent/bridge-descriptors/server-descriptors/': 'bridge-server-descriptor 1.2',
+ 'recent/bridge-descriptors/statuses/': 'bridge-network-status 1.2',
+ 'recent/exit-lists/': 'tordnsel 1.0',
+ 'recent/relay-descriptors/bandwidths/': 'bandwidth-file 1.0',
+ 'recent/relay-descriptors/consensuses/': 'network-status-consensus-3 1.0',
+ 'recent/relay-descriptors/extra-infos/': 'extra-info 1.0',
+ 'recent/relay-descriptors/microdescs/consensus-microdesc/': 'network-status-microdesc-consensus-3 1.0',
+ 'recent/relay-descriptors/microdescs/micro/': 'microdescriptor 1.0',
+ 'recent/relay-descriptors/server-descriptors/': 'server-descriptor 1.0',
+ 'recent/relay-descriptors/votes/': 'network-status-vote-3 1.0',
+ 'recent/torperf/': 'torperf 1.1',
+ 'recent/webstats/': (),
+}
+
def _download(url, compression, timeout, retries):
"""
@@ -135,6 +169,27 @@ class File(object):
self.path = path
self.size = size
self.last_modified = datetime.datetime.strptime(last_modified, '%Y-%m-%d %H:%M')
+ self._guessed_type = None
+
+ def guess_descriptor_types(self):
+ """
+ Descriptor @type this file is expected to have based on its path. If unable
+ to determine any this tuple is empty.
+
+ :returns: **tuple** with the descriptor types this file is expected to have
+ """
+
+ if self._guessed_type is None:
+ guessed_type = ()
+
+ for path_prefix, types in COLLECTOR_DESC_TYPES.items():
+ if self.path.startswith(path_prefix):
+ guessed_type = (types,) if isinstance(types, str) else types
+ break
+
+ self._guessed_type = guessed_type
+
+ return self._guessed_type
class CollecTor(object):
diff --git a/test/unit/descriptor/collector.py b/test/unit/descriptor/collector.py
index 3403ee50..609a4929 100644
--- a/test/unit/descriptor/collector.py
+++ b/test/unit/descriptor/collector.py
@@ -9,7 +9,7 @@ import unittest
import stem.prereq
from stem.descriptor import Compression
-from stem.descriptor.collector import CollecTor
+from stem.descriptor.collector import CollecTor, File
from test.unit.descriptor import get_resource
from test.unit.descriptor.data.collector_index import EXAMPLE_INDEX
@@ -135,3 +135,16 @@ class TestCollector(unittest.TestCase):
self.assertEqual(test_path, extrainfo_file.path)
self.assertEqual(6459884, extrainfo_file.size)
self.assertEqual(datetime.datetime(2016, 6, 23, 9, 54), extrainfo_file.last_modified)
+
+ def test_guess_descriptor_types(self):
+ f = File('archive/bridge-descriptors/extra-infos/bridge-extra-infos-2008-05.tar.xz', 377644, '2016-09-04 09:21')
+ self.assertEqual(('bridge-extra-info 1.3',), f.guess_descriptor_types())
+
+ f = File('archive/relay-descriptors/microdescs/microdescs-2014-01.tar.xz', 7515396, '2014-02-07 03:59')
+ self.assertEqual(('network-status-microdesc-consensus-3 1.0', 'microdescriptor 1.0'), f.guess_descriptor_types())
+
+ f = File('archive/webstats/webstats-2015-03.tar', 20480, '2018-03-19 16:07')
+ self.assertEqual((), f.guess_descriptor_types())
+
+ f = File('archive/no_such_file.tar', 20480, '2018-03-19 16:07')
+ self.assertEqual((), f.guess_descriptor_types())
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits