[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [stem/master] Making descriptor reader persistence more convenient
commit 10cb30b546016f975559bdf1d8dd785ee516ae84
Author: Damian Johnson <atagar@xxxxxxxxxxxxxx>
Date: Sat May 5 16:54:24 2012 -0700
Making descriptor reader persistence more convenient
Our current functions for loading/saving processed file listings are fine if
you want error handling and a great deal of control. However, I suspect that
most callers would prefer for this to be an attribute of the reader itself.
Adding an argument that performs best-effort persistance of our processed files
listing.
---
stem/descriptor/reader.py | 22 +++++++++++++++++++++-
test/integ/descriptor/reader.py | 29 +++++++++++++++++++++++++++++
2 files changed, 50 insertions(+), 1 deletions(-)
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index 0bddd53..8f00f5f 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -191,17 +191,25 @@ class DescriptorReader:
waiting for our caller to fetch some of them. This is included to avoid
unbounded memory usage.
+ Our persistence_path argument is a convenient method to persist the listing
+ of files we have processed between runs, however it doesn't allow for error
+ handling. If you want that then use the load/save_processed_files functions
+ instead.
+
Arguments:
targets (list) - paths for files or directories to be read from
follow_links (bool) - determines if we'll follow symlinks when traversing
directories
buffer_size (int) - descriptors we'll buffer before waiting for some to
be read, this is unbounded if zero
+ persistence_path (str) - if set we will load and save processed file
+ listings from this path, errors are ignored
"""
- def __init__(self, targets, follow_links = False, buffer_size = 100):
+ def __init__(self, targets, follow_links = False, buffer_size = 100, persistence_path = None):
self._targets = targets
self._follow_links = follow_links
+ self._persistence_path = persistence_path
self._skip_listeners = []
self._processed_files = {}
@@ -218,6 +226,12 @@ class DescriptorReader:
# FINISHED entry is used by the reading thread to indicate the end.
self._unreturned_descriptors = Queue.Queue(buffer_size)
+
+ if self._persistence_path:
+ try:
+ processed_files = load_processed_files(self._persistence_path)
+ self.set_processed_files(processed_files)
+ except: pass
def get_processed_files(self):
"""
@@ -311,6 +325,12 @@ class DescriptorReader:
self._reader_thread.join()
self._reader_thread = None
+
+ if self._persistence_path:
+ try:
+ processed_files = self.get_processed_files()
+ save_processed_files(self._persistence_path, processed_files)
+ except: pass
def _read_descriptor_files(self):
new_processed_files = {}
diff --git a/test/integ/descriptor/reader.py b/test/integ/descriptor/reader.py
index 5c84704..0e3467c 100644
--- a/test/integ/descriptor/reader.py
+++ b/test/integ/descriptor/reader.py
@@ -207,6 +207,35 @@ class TestDescriptorReader(unittest.TestCase):
time.sleep(0.01)
self.assertTrue(reader.get_buffered_descriptor_count() <= 2)
+ def test_persistence_path(self):
+ """
+ Check that the persistence_path argument loads and saves a a processed
+ files listing.
+ """
+
+ persistence_path = _get_processed_files_path()
+ descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, "example_descriptor")
+
+ # First run where the persistence_path doesn't yet exist. This just tests
+ # the saving functionality.
+
+ reader = stem.descriptor.reader.DescriptorReader([descriptor_path], persistence_path = persistence_path)
+ with reader: self.assertEqual(1, len(list(reader)))
+
+ # check that we've saved reading example_descriptor
+ self.assertTrue(os.path.exists(persistence_path))
+
+ with open(persistence_path) as persistence_file:
+ persistance_file_contents = persistence_file.read()
+ self.assertTrue(persistance_file_contents.startswith(descriptor_path))
+
+ # Try running again with a new reader but the same persistance path, if it
+ # reads and takes the persistence_path into account then it won't read the
+ # descriptor file. This in essence just tests its loading functionality.
+
+ reader = stem.descriptor.reader.DescriptorReader([descriptor_path], persistence_path = persistence_path)
+ with reader: self.assertEqual(0, len(list(reader)))
+
def test_archived_uncompressed(self):
"""
Checks that we can read descriptors from an uncompressed archive.
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits