[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [stem/master] Parsing descriptor files and annotations
commit e20fd3efae6a0e3a7f647017da9f62ccc83aebcc
Author: Damian Johnson <atagar@xxxxxxxxxxxxxx>
Date: Fri Mar 23 07:31:41 2012 -0700
Parsing descriptor files and annotations
Completely untested change to finish the implementation of server descriptors.
This parses a cached descriptor file into individual descriptors and
annotations.
Next comes unit and integ tests to start exercising it in an automated way.
---
stem/descriptor/server_descriptor.py | 115 ++++++++++++++++++++++++++++++++--
1 files changed, 110 insertions(+), 5 deletions(-)
diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 80b13ac..6d16400 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -53,7 +53,76 @@ def parse_server_descriptors_v2(path, descriptor_file):
Iterates over the verion 2 server descriptors in a descriptor file.
"""
- pass
+ # Cached descriptors consist of annotations followed by the descriptor
+ # itself. For instance...
+ #
+ # @downloaded-at 2012-03-14 16:31:05
+ # @source "145.53.65.130"
+ # router caerSidi 71.35.143.157 9001 0 0
+ # platform Tor 0.2.1.30 on Linux x86_64
+ # <rest of the descriptor content>
+ # router-signature
+ # -----BEGIN SIGNATURE-----
+ # <signature for the above descriptor>
+ # -----END SIGNATURE-----
+ #
+ # Metrics descriptor files are the same, but lack any annotations. The
+ # following simply does the following...
+ #
+ # - parse as annotations until we get to ENTRY_START
+ # - parse as descriptor content until we get to ENTRY_END followed by the
+ # end of the signature block
+ # - construct a descriptor and provide it back to the caller
+
+ while descriptor_file:
+ annotations = _read_until_keyword(ENTRY_START, descriptor_file)
+ descriptor_content = _read_until_keyword(ENTRY_END, descriptor_file)
+
+ # we've reached the 'router-signature', now include the pgp style block
+ block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+ descriptor_content += _read_until_keyword(block_end_prefix, descriptor_file, True)
+
+ # If the file has ending annotations (ie, non-descriptor text after the
+ # last descriptor) then we won't have any descriptor content at this point.
+ # This is fine. Those ending annotations are simply never returned to the
+ # caller.
+
+ if descriptor_content:
+ yield ServerDescriptorV2(descriptr_content, annotations = annotations)
+
+def _read_until_keyword(keyword, descriptor_file, inclusive = False):
+ """
+ Reads from the descriptor file until we get to the given keyword or reach the
+ end of the file.
+
+ Arguments:
+ keyword (str) - keyword we want to read until
+ descriptor_file (file) - file with the descriptor content
+ inclusive (bool) - includes the line with the keyword if True
+
+ Returns:
+ list with the lines until we find the keyword
+ """
+
+ content = []
+
+ while descriptor_file:
+ last_position = descriptor_file.tell()
+ line = descriptor_file.readline()
+
+ if not line: continue # blank line
+ elif " " in line: line_keyword = line.split(" ", 1)[0]
+ else: line_keyword = line
+
+ if line_keyword == keyword:
+ if inclusive: content.append(line)
+ else: descriptor_file.seek(last_position)
+
+ break
+ else:
+ content.append(line)
+
+ return content
def _get_psudo_pgp_block(remaining_contents):
"""
@@ -139,7 +208,7 @@ class ServerDescriptorV2(Descriptor):
exit_policy = []
- def __init__(self, contents, validate = True):
+ def __init__(self, contents, validate = True, annotations = None):
"""
Version 2 server descriptor constructor, created from an individual relay's
descriptor content (as provided by "GETINFO desc/*", cached descriptors,
@@ -150,9 +219,10 @@ class ServerDescriptorV2(Descriptor):
malformed data.
Arguments:
- contents (str) - descriptor content provided by the relay
- validate (bool) - checks the validity of the descriptor's content if True,
- skips these checks otherwise
+ contents (str) - descriptor content provided by the relay
+ validate (bool) - checks the validity of the descriptor's content if
+ True, skips these checks otherwise
+ annotations (list) - lines that appeared prior to the descriptor
Raises:
ValueError if the contents is malformed and validate is True
@@ -160,6 +230,15 @@ class ServerDescriptorV2(Descriptor):
Descriptor.__init__(self, contents)
+ self._annotation_lines = annotations
+ self._annotation_dict = {}
+
+ for line in annotations:
+ if " " in line:
+ key, value = line.split(" ", 1)
+ self._annotation_dict[key] = value
+ else: self._annotation_dict[line] = None
+
# A descriptor contains a series of 'keyword lines' which are simply a
# keyword followed by an optional value. Lines can also be followed by a
# signature block.
@@ -338,6 +417,32 @@ class ServerDescriptorV2(Descriptor):
else:
unrecognized_entries.append(line)
+ def get_annotations(self):
+ """
+ Provides content that appeard prior to the descriptor. If this comes from
+ the cached-descriptors file then this commonly contains content like...
+
+ @downloaded-at 2012-03-18 21:18:29
+ @source "173.254.216.66"
+
+ Returns:
+ dict with the key/value pairs in our annotations
+ """
+
+ return self._annotation_dict
+
+ def get_annotation_lines(self):
+ """
+ Provides the lines of content that appeared prior to the descriptor. This
+ is the same as the get_annotations() results, but with the unparsed lines
+ and ordering retained.
+
+ Returns:
+ list with the lines of annotation that came before this descriptor
+ """
+
+ return self._annotation_lines
+
def is_valid(self):
"""
Validates that our content matches our signature.
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits