[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [stem/master] Parsing descriptor files and annotations



commit e20fd3efae6a0e3a7f647017da9f62ccc83aebcc
Author: Damian Johnson <atagar@xxxxxxxxxxxxxx>
Date:   Fri Mar 23 07:31:41 2012 -0700

    Parsing descriptor files and annotations
    
    Completely untested change to finish the implementation of server descriptors.
    This parses a cached descriptor file into individual descriptors and
    annotations.
    
    Next comes unit and integ tests to start exercising it in an automated way.
---
 stem/descriptor/server_descriptor.py |  115 ++++++++++++++++++++++++++++++++--
 1 files changed, 110 insertions(+), 5 deletions(-)

diff --git a/stem/descriptor/server_descriptor.py b/stem/descriptor/server_descriptor.py
index 80b13ac..6d16400 100644
--- a/stem/descriptor/server_descriptor.py
+++ b/stem/descriptor/server_descriptor.py
@@ -53,7 +53,76 @@ def parse_server_descriptors_v2(path, descriptor_file):
   Iterates over the verion 2 server descriptors in a descriptor file.
   """
   
-  pass
+  # Cached descriptors consist of annotations followed by the descriptor
+  # itself. For instance...
+  #
+  #   @downloaded-at 2012-03-14 16:31:05
+  #   @source "145.53.65.130"
+  #   router caerSidi 71.35.143.157 9001 0 0
+  #   platform Tor 0.2.1.30 on Linux x86_64
+  #   <rest of the descriptor content>
+  #   router-signature
+  #   -----BEGIN SIGNATURE-----
+  #   <signature for the above descriptor>
+  #   -----END SIGNATURE-----
+  #
+  # Metrics descriptor files are the same, but lack any annotations. The
+  # following simply does the following...
+  #
+  #   - parse as annotations until we get to ENTRY_START
+  #   - parse as descriptor content until we get to ENTRY_END followed by the
+  #     end of the signature block
+  #   - construct a descriptor and provide it back to the caller
+  
+  while descriptor_file:
+    annotations = _read_until_keyword(ENTRY_START, descriptor_file)
+    descriptor_content = _read_until_keyword(ENTRY_END, descriptor_file)
+    
+    # we've reached the 'router-signature', now include the pgp style block
+    block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
+    descriptor_content += _read_until_keyword(block_end_prefix, descriptor_file, True)
+    
+    # If the file has ending annotations (ie, non-descriptor text after the
+    # last descriptor) then we won't have any descriptor content at this point.
+    # This is fine. Those ending annotations are simply never returned to the
+    # caller.
+    
+    if descriptor_content:
+      yield ServerDescriptorV2(descriptr_content, annotations = annotations)
+
+def _read_until_keyword(keyword, descriptor_file, inclusive = False):
+  """
+  Reads from the descriptor file until we get to the given keyword or reach the
+  end of the file.
+  
+  Arguments:
+    keyword (str)          - keyword we want to read until
+    descriptor_file (file) - file with the descriptor content
+    inclusive (bool)       - includes the line with the keyword if True
+  
+  Returns:
+    list with the lines until we find the keyword
+  """
+  
+  content = []
+  
+  while descriptor_file:
+    last_position = descriptor_file.tell()
+    line = descriptor_file.readline()
+    
+    if not line: continue # blank line
+    elif " " in line: line_keyword = line.split(" ", 1)[0]
+    else: line_keyword = line
+    
+    if line_keyword == keyword:
+      if inclusive: content.append(line)
+      else: descriptor_file.seek(last_position)
+      
+      break
+    else:
+      content.append(line)
+  
+  return content
 
 def _get_psudo_pgp_block(remaining_contents):
   """
@@ -139,7 +208,7 @@ class ServerDescriptorV2(Descriptor):
   
   exit_policy = []
   
-  def __init__(self, contents, validate = True):
+  def __init__(self, contents, validate = True, annotations = None):
     """
     Version 2 server descriptor constructor, created from an individual relay's
     descriptor content (as provided by "GETINFO desc/*", cached descriptors,
@@ -150,9 +219,10 @@ class ServerDescriptorV2(Descriptor):
     malformed data.
     
     Arguments:
-      contents (str)  - descriptor content provided by the relay
-      validate (bool) - checks the validity of the descriptor's content if True,
-                        skips these checks otherwise
+      contents (str)     - descriptor content provided by the relay
+      validate (bool)    - checks the validity of the descriptor's content if
+                           True, skips these checks otherwise
+      annotations (list) - lines that appeared prior to the descriptor
     
     Raises:
       ValueError if the contents is malformed and validate is True
@@ -160,6 +230,15 @@ class ServerDescriptorV2(Descriptor):
     
     Descriptor.__init__(self, contents)
     
+    self._annotation_lines = annotations
+    self._annotation_dict = {}
+    
+    for line in annotations:
+      if " " in line:
+        key, value = line.split(" ", 1)
+        self._annotation_dict[key] = value
+      else: self._annotation_dict[line] = None
+    
     # A descriptor contains a series of 'keyword lines' which are simply a
     # keyword followed by an optional value. Lines can also be followed by a
     # signature block.
@@ -338,6 +417,32 @@ class ServerDescriptorV2(Descriptor):
       else:
         unrecognized_entries.append(line)
   
+  def get_annotations(self):
+    """
+    Provides content that appeard prior to the descriptor. If this comes from
+    the cached-descriptors file then this commonly contains content like...
+    
+      @downloaded-at 2012-03-18 21:18:29
+      @source "173.254.216.66"
+    
+    Returns:
+      dict with the key/value pairs in our annotations
+    """
+    
+    return self._annotation_dict
+  
+  def get_annotation_lines(self):
+    """
+    Provides the lines of content that appeared prior to the descriptor. This
+    is the same as the get_annotations() results, but with the unparsed lines
+    and ordering retained.
+    
+    Returns:
+      list with the lines of annotation that came before this descriptor
+    """
+    
+    return self._annotation_lines
+  
   def is_valid(self):
     """
     Validates that our content matches our signature.



_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits