[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [stem/master] Remove deprecated modules
commit c1c4e7a288d26a2895838f2ed121786078db42fe
Author: Damian Johnson <atagar@xxxxxxxxxxxxxx>
Date: Mon Jan 6 15:07:47 2020 -0800
Remove deprecated modules
We're dropping stem.descriptor's reader and export module due to lack of use...
* I wrote stem.descriptor.reader at Karsten's suggestion to read descriptors
from disk, and track when those on-disk files change. The design seemed to
be for usage within CollecTor, but never was.
In practice stem.descriptor.from_file() provides a simpler mechanism to
read descriptors form disk.
* stem.descriptor.export was contributed by a university student in Stem's
early days. I've never used it nor found anyone else who does.
This module serializes descriptors to a CSV, which is moot since
descriptors already have a string representation we can read and
write...
with open('/path/to/descriptor', 'w') as descriptor_file:
descriptor_file.write(str(my_descriptor))
my_descriptor = stem.descriptor.from_file('/path/to/descriptor', 'server-descriptor 1.0')
---
docs/api.rst | 10 +-
docs/api/descriptor/export.rst | 5 -
docs/api/descriptor/reader.rst | 5 -
docs/contents.rst | 5 +-
stem/control.py | 1 -
stem/descriptor/__init__.py | 12 +-
stem/descriptor/export.py | 111 -------
stem/descriptor/reader.py | 563 -----------------------------------
test/integ/control/controller.py | 1 -
test/settings.cfg | 3 -
test/unit/descriptor/export.py | 94 ------
test/unit/descriptor/reader.py | 625 ---------------------------------------
12 files changed, 7 insertions(+), 1428 deletions(-)
diff --git a/docs/api.rst b/docs/api.rst
index cbbf0dd0..58604e90 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -21,11 +21,9 @@ Controller
Descriptors
-----------
-To read descriptors from disk use :func:`~stem.descriptor.__init__.parse_file` for
-individual files and `stem.descriptor.reader
-<api/descriptor/reader.html>`_ for batches. You can also use
-`stem.descriptor.remote <api/descriptor/remote.html>`_ to download descriptors
-remotely like Tor does.
+To read descriptors from disk use :func:`~stem.descriptor.__init__.parse_file`.
+You can also use `stem.descriptor.remote <api/descriptor/remote.html>`_ to
+download descriptors remotely like Tor does.
* **Classes**
@@ -41,10 +39,8 @@ remotely like Tor does.
* `stem.descriptor.certificate <api/descriptor/certificate.html>`_ - `Ed25519 certificates <https://gitweb.torproject.org/torspec.git/tree/cert-spec.txt>`_.
* `stem.directory <api/directory.html>`_ - Directory authority and fallback directory information.
-* `stem.descriptor.reader <api/descriptor/reader.html>`_ - Reads and parses descriptor files from disk.
* `stem.descriptor.remote <api/descriptor/remote.html>`_ - Downloads descriptors from directory mirrors and authorities.
* `stem.descriptor.collector <api/descriptor/collector.html>`_ - Downloads past descriptors from `CollecTor <https://metrics.torproject.org/collector.html>`_.
-* `stem.descriptor.export <api/descriptor/export.html>`_ - Exports descriptors to other formats.
Utilities
---------
diff --git a/docs/api/descriptor/export.rst b/docs/api/descriptor/export.rst
deleted file mode 100644
index a39e7773..00000000
--- a/docs/api/descriptor/export.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Descriptor Exporter
-===================
-
-.. automodule:: stem.descriptor.export
-
diff --git a/docs/api/descriptor/reader.rst b/docs/api/descriptor/reader.rst
deleted file mode 100644
index 89c1a69f..00000000
--- a/docs/api/descriptor/reader.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Descriptor Reader
-=================
-
-.. automodule:: stem.descriptor.reader
-
diff --git a/docs/contents.rst b/docs/contents.rst
index 87e75220..99ca686b 100644
--- a/docs/contents.rst
+++ b/docs/contents.rst
@@ -46,6 +46,7 @@ Contents
api/descriptor/certificate
api/descriptor/collector
api/descriptor/descriptor
+ api/descriptor/remote
api/descriptor/server_descriptor
api/descriptor/extrainfo_descriptor
api/descriptor/microdescriptor
@@ -54,10 +55,6 @@ Contents
api/descriptor/hidden_service
api/descriptor/tordnsel
- api/descriptor/export
- api/descriptor/reader
- api/descriptor/remote
-
api/util/init
api/util/conf
api/util/connection
diff --git a/stem/control.py b/stem/control.py
index 4adec330..9fda9d34 100644
--- a/stem/control.py
+++ b/stem/control.py
@@ -258,7 +258,6 @@ import threading
import time
import stem.descriptor.microdescriptor
-import stem.descriptor.reader
import stem.descriptor.router_status_entry
import stem.descriptor.server_descriptor
import stem.exit_policy
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index fff08910..11fff944 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -110,12 +110,10 @@ __all__ = [
'bandwidth_file',
'certificate',
'collector',
- 'export',
'extrainfo_descriptor',
'hidden_service',
'microdescriptor',
'networkstatus',
- 'reader',
'remote',
'router_status_entry',
'server_descriptor',
@@ -297,10 +295,6 @@ def parse_file(descriptor_file, descriptor_type = None, validate = False, docume
* The filename if it matches something from tor's data directory. For
instance, tor's 'cached-descriptors' contains server descriptors.
- This is a handy function for simple usage, but if you're reading multiple
- descriptor files you might want to consider the
- :class:`~stem.descriptor.reader.DescriptorReader`.
-
Descriptor types include the following, including further minor versions (ie.
if we support 1.1 then we also support everything from 1.0 and most things
from 1.2, but not 2.0)...
@@ -940,9 +934,9 @@ class Descriptor(object):
def get_archive_path(self):
"""
If this descriptor came from an archive then provides its path within the
- archive. This is only set if the descriptor came from a
- :class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this
- descriptor didn't come from an archive.
+ archive. This is only set if the descriptor was read by
+ :class:`~stem.descriptor.__init__.parse_file` from an archive, and **None**
+ otherwise.
:returns: **str** with the descriptor's path within the archive
"""
diff --git a/stem/descriptor/export.py b/stem/descriptor/export.py
deleted file mode 100644
index 35835d7c..00000000
--- a/stem/descriptor/export.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright 2012-2020, Damian Johnson and The Tor Project
-# See LICENSE for licensing information
-
-"""
-Toolkit for exporting descriptors to other formats.
-
-**Module Overview:**
-
-::
-
- export_csv - Exports descriptors to a CSV
- export_csv_file - Writes exported CSV output to a file
-
-.. deprecated:: 1.7.0
-
- This module will likely be removed in Stem 2.0 due to lack of usage. If you
- use this modle please `let me know <https://www.atagar.com/contact/>`_.
-"""
-
-import io
-import csv
-
-import stem.descriptor
-import stem.prereq
-
-
-class _ExportDialect(csv.excel):
- lineterminator = '\n'
-
-
-def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
- """
- Provides a newline separated CSV for one or more descriptors. If simply
- provided with descriptors then the CSV contains all of its attributes,
- labeled with a header row. Either 'included_fields' or 'excluded_fields' can
- be used for more granular control over its attributes and the order.
-
- :param Descriptor,list descriptors: either a
- :class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
- :param list included_fields: attributes to include in the csv
- :param list excluded_fields: attributes to exclude from the csv
- :param bool header: if **True** then the first line will be a comma separated
- list of the attribute names
-
- :returns: **str** of the CSV for the descriptors, one per line
- :raises: **ValueError** if descriptors contain more than one descriptor type
- """
-
- output_buffer = io.StringIO()
- export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
- return output_buffer.getvalue()
-
-
-def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
- """
- Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
- written directly to a file.
-
- :param file output_file: file to be written to
- :param Descriptor,list descriptors: either a
- :class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
- :param list included_fields: attributes to include in the csv
- :param list excluded_fields: attributes to exclude from the csv
- :param bool header: if **True** then the first line will be a comma separated
- list of the attribute names
-
- :returns: **str** of the CSV for the descriptors, one per line
- :raises: **ValueError** if descriptors contain more than one descriptor type
- """
-
- if isinstance(descriptors, stem.descriptor.Descriptor):
- descriptors = (descriptors,)
-
- if not descriptors:
- return
-
- descriptor_type = type(descriptors[0])
- descriptor_type_label = descriptor_type.__name__
- included_fields = list(included_fields)
-
- # If the user didn't specify the fields to include then export everything,
- # ordered alphabetically. If they did specify fields then make sure that
- # they exist.
-
- desc_attr = sorted(vars(descriptors[0]).keys())
-
- if included_fields:
- for field in included_fields:
- if field not in desc_attr:
- raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ', '.join(desc_attr)))
- else:
- included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
-
- for field in excluded_fields:
- try:
- included_fields.remove(field)
- except ValueError:
- pass
-
- writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
-
- if header:
- writer.writeheader()
-
- for desc in descriptors:
- if not isinstance(desc, stem.descriptor.Descriptor):
- raise ValueError('Unable to export a descriptor CSV since %s is not a descriptor.' % type(desc).__name__)
- elif descriptor_type != type(desc):
- raise ValueError('To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s.' % (descriptor_type_label, type(desc)))
-
- writer.writerow(vars(desc))
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
deleted file mode 100644
index e75cdb7e..00000000
--- a/stem/descriptor/reader.py
+++ /dev/null
@@ -1,563 +0,0 @@
-# Copyright 2012-2020, Damian Johnson and The Tor Project
-# See LICENSE for licensing information
-
-"""
-Utilities for reading descriptors from local directories and archives. This is
-mostly done through the :class:`~stem.descriptor.reader.DescriptorReader`
-class, which is an iterator for the descriptor data in a series of
-destinations. For example...
-
-::
-
- my_descriptors = [
- '/tmp/server-descriptors-2012-03.tar.bz2',
- '/tmp/archived_descriptors/',
- ]
-
- # prints the contents of all the descriptor files
- with DescriptorReader(my_descriptors) as reader:
- for descriptor in reader:
- print descriptor
-
-This ignores files that cannot be processed due to read errors or unparsable
-content. To be notified of skipped files you can register a listener with
-:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`.
-
-The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last
-modified timestamps for descriptor files that it has read so it can skip
-unchanged files if run again. This listing of processed files can also be
-persisted and applied to other
-:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the
-following prints descriptors as they're changed over the course of a minute,
-and picks up where it left off if run again...
-
-::
-
- reader = DescriptorReader(['/tmp/descriptor_data'])
-
- try:
- processed_files = load_processed_files('/tmp/used_descriptors')
- reader.set_processed_files(processed_files)
- except: pass # could not load, maybe this is the first run
-
- start_time = time.time()
-
- while (time.time() - start_time) < 60:
- # prints any descriptors that have changed since last checked
- with reader:
- for descriptor in reader:
- print descriptor
-
- time.sleep(1)
-
- save_processed_files('/tmp/used_descriptors', reader.get_processed_files())
-
-**Module Overview:**
-
-::
-
- load_processed_files - Loads a listing of processed files
- save_processed_files - Saves a listing of processed files
-
- DescriptorReader - Iterator for descriptor data on the local file system
- |- get_processed_files - provides the listing of files that we've processed
- |- set_processed_files - sets our tracking of the files we have processed
- |- register_read_listener - adds a listener for when files are read
- |- register_skip_listener - adds a listener that's notified of skipped files
- |- start - begins reading descriptor data
- |- stop - stops reading descriptor data
- |- __enter__ / __exit__ - manages the descriptor reader thread in the context
- +- __iter__ - iterates over descriptor data in unread files
-
- FileSkipped - Base exception for a file that was skipped
- |- AlreadyRead - We've already read a file with this last modified timestamp
- |- ParsingFailure - Contents can't be parsed as descriptor data
- |- UnrecognizedType - File extension indicates non-descriptor data
- +- ReadFailed - Wraps an error that was raised while reading the file
- +- FileMissing - File does not exist
-
-.. deprecated:: 1.8.0
-
- This module will likely be removed in Stem 2.0 due to lack of usage. If you
- use this modle please `let me know <https://www.atagar.com/contact/>`_.
-"""
-
-import mimetypes
-import os
-import queue
-import tarfile
-import threading
-
-import stem.descriptor
-import stem.prereq
-import stem.util
-import stem.util.str_tools
-import stem.util.system
-
-# flag to indicate when the reader thread is out of descriptor files to read
-FINISHED = 'DONE'
-
-
-class FileSkipped(Exception):
- "Base error when we can't provide descriptor data from a file."
-
-
-class AlreadyRead(FileSkipped):
- """
- Already read a file with this 'last modified' timestamp or later.
-
- :param int last_modified: unix timestamp for when the file was last modified
- :param int last_modified_when_read: unix timestamp for the modification time
- when we last read this file
- """
-
- def __init__(self, last_modified, last_modified_when_read):
- super(AlreadyRead, self).__init__('File has already been read since it was last modified. modification time: %s, last read: %s' % (last_modified, last_modified_when_read))
- self.last_modified = last_modified
- self.last_modified_when_read = last_modified_when_read
-
-
-class ParsingFailure(FileSkipped):
- """
- File contents could not be parsed as descriptor data.
-
- :param ValueError exception: issue that arose when parsing
- """
-
- def __init__(self, parsing_exception):
- super(ParsingFailure, self).__init__(parsing_exception)
- self.exception = parsing_exception
-
-
-class UnrecognizedType(FileSkipped):
- """
- File doesn't contain descriptor data. This could either be due to its file
- type or because it doesn't conform to a recognizable descriptor type.
-
- :param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type()
- """
-
- def __init__(self, mime_type):
- super(UnrecognizedType, self).__init__('Unrecognized mime type: %s (%s)' % mime_type)
- self.mime_type = mime_type
-
-
-class ReadFailed(FileSkipped):
- """
- An IOError occurred while trying to read the file.
-
- :param IOError exception: issue that arose when reading the file, **None** if
- this arose due to the file not being present
- """
-
- def __init__(self, read_exception):
- super(ReadFailed, self).__init__(read_exception)
- self.exception = read_exception
-
-
-class FileMissing(ReadFailed):
- 'File does not exist.'
-
- def __init__(self):
- super(FileMissing, self).__init__('File does not exist')
-
-
-def load_processed_files(path):
- """
- Loads a dictionary of 'path => last modified timestamp' mappings, as
- persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a
- file.
-
- :param str path: location to load the processed files dictionary from
-
- :returns: **dict** of 'path (**str**) => last modified unix timestamp
- (**int**)' mappings
-
- :raises:
- * **IOError** if unable to read the file
- * **TypeError** if unable to parse the file's contents
- """
-
- processed_files = {}
-
- with open(path, 'rb') as input_file:
- for line in input_file.readlines():
- line = stem.util.str_tools._to_unicode(line.strip())
-
- if not line:
- continue # skip blank lines
-
- if ' ' not in line:
- raise TypeError('Malformed line: %s' % line)
-
- path, timestamp = line.rsplit(' ', 1)
-
- if not os.path.isabs(path):
- raise TypeError("'%s' is not an absolute path" % path)
- elif not timestamp.isdigit():
- raise TypeError("'%s' is not an integer timestamp" % timestamp)
-
- processed_files[path] = int(timestamp)
-
- return processed_files
-
-
-def save_processed_files(path, processed_files):
- """
- Persists a dictionary of 'path => last modified timestamp' mappings (as
- provided by the DescriptorReader's
- :func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method)
- so that they can be loaded later and applied to another
- :class:`~stem.descriptor.reader.DescriptorReader`.
-
- :param str path: location to save the processed files dictionary to
- :param dict processed_files: 'path => last modified' mappings
-
- :raises:
- * **IOError** if unable to write to the file
- * **TypeError** if processed_files is of the wrong type
- """
-
- # makes the parent directory if it doesn't already exist
-
- try:
- path_dir = os.path.dirname(path)
-
- if not os.path.exists(path_dir):
- os.makedirs(path_dir)
- except OSError as exc:
- raise IOError(exc)
-
- with open(path, 'w') as output_file:
- for path, timestamp in list(processed_files.items()):
- if not os.path.isabs(path):
- raise TypeError('Only absolute paths are acceptable: %s' % path)
-
- output_file.write('%s %i\n' % (path, timestamp))
-
-
-class DescriptorReader(object):
- """
- Iterator for the descriptor data on the local file system. This can process
- text files, tarball archives (gzip or bzip2), or recurse directories.
-
- By default this limits the number of descriptors that we'll read ahead before
- waiting for our caller to fetch some of them. This is included to avoid
- unbounded memory usage.
-
- Our persistence_path argument is a convenient method to persist the listing
- of files we have processed between runs, however it doesn't allow for error
- handling. If you want that then use the
- :func:`~stem.descriptor.reader.load_processed_files` and
- :func:`~stem.descriptor.reader.save_processed_files` functions instead.
-
- :param str,list target: path or list of paths for files or directories to be read from
- :param bool validate: checks the validity of the descriptor's content if
- **True**, skips these checks otherwise
- :param bool follow_links: determines if we'll follow symlinks when traversing
- directories
- :param int buffer_size: descriptors we'll buffer before waiting for some to
- be read, this is unbounded if zero
- :param str persistence_path: if set we will load and save processed file
- listings from this path, errors are ignored
- :param stem.descriptor.__init__.DocumentHandler document_handler: method in
- which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
- :param dict kwargs: additional arguments for the descriptor constructor
- """
-
- def __init__(self, target, validate = False, follow_links = False, buffer_size = 100, persistence_path = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
- self._targets = [target] if isinstance(target, (bytes, str)) else target
-
- # expand any relative paths we got
-
- self._targets = list(map(os.path.abspath, self._targets))
-
- self._validate = validate
- self._follow_links = follow_links
- self._persistence_path = persistence_path
- self._document_handler = document_handler
- self._kwargs = kwargs
- self._read_listeners = []
- self._skip_listeners = []
- self._processed_files = {}
-
- self._reader_thread = None
- self._reader_thread_lock = threading.RLock()
-
- self._iter_lock = threading.RLock()
- self._iter_notice = threading.Event()
-
- self._is_stopped = threading.Event()
- self._is_stopped.set()
-
- # Descriptors that we have read but not yet provided to the caller. A
- # FINISHED entry is used by the reading thread to indicate the end.
-
- self._unreturned_descriptors = queue.Queue(buffer_size)
-
- if self._persistence_path:
- try:
- processed_files = load_processed_files(self._persistence_path)
- self.set_processed_files(processed_files)
- except:
- pass
-
- def get_processed_files(self):
- """
- For each file that we have read descriptor data from this provides a
- mapping of the form...
-
- ::
-
- absolute path (str) => last modified unix timestamp (int)
-
- This includes entries set through the
- :func:`~stem.descriptor.reader.DescriptorReader.set_processed_files`
- method. Each run resets this to only the files that were present during
- that run.
-
- :returns: **dict** with the absolute paths and unix timestamp for the last
- modified times of the files we have processed
- """
-
- # make sure that we only provide back absolute paths
- return dict((os.path.abspath(k), v) for (k, v) in list(self._processed_files.items()))
-
- def set_processed_files(self, processed_files):
- """
- Sets the listing of the files we have processed. Most often this is used
- with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to
- pre-populate the listing of descriptor files that we have seen.
-
- :param dict processed_files: mapping of absolute paths (**str**) to unix
- timestamps for the last modified time (**int**)
- """
-
- self._processed_files = dict(processed_files)
-
- def register_read_listener(self, listener):
- """
- Registers a listener for when files are read. This is executed prior to
- processing files. Listeners are expected to be of the form...
-
- ::
-
- my_listener(path)
-
- :param functor listener: functor to be notified when files are read
- """
-
- self._read_listeners.append(listener)
-
- def register_skip_listener(self, listener):
- """
- Registers a listener for files that are skipped. This listener is expected
- to be a functor of the form...
-
- ::
-
- my_listener(path, exception)
-
- :param functor listener: functor to be notified of files that are skipped
- to read errors or because they couldn't be parsed as valid descriptor data
- """
-
- self._skip_listeners.append(listener)
-
- def get_buffered_descriptor_count(self):
- """
- Provides the number of descriptors that are waiting to be iterated over.
- This is limited to the buffer_size that we were constructed with.
-
- :returns: **int** for the estimated number of currently enqueued
- descriptors, this is not entirely reliable
- """
-
- return self._unreturned_descriptors.qsize()
-
- def start(self):
- """
- Starts reading our descriptor files.
-
- :raises: **ValueError** if we're already reading the descriptor files
- """
-
- with self._reader_thread_lock:
- if self._reader_thread:
- raise ValueError('Already running, you need to call stop() first')
- else:
- self._is_stopped.clear()
- self._reader_thread = threading.Thread(target = self._read_descriptor_files, name='Descriptor reader')
- self._reader_thread.setDaemon(True)
- self._reader_thread.start()
-
- def stop(self):
- """
- Stops further reading of descriptor files.
- """
-
- with self._reader_thread_lock:
- self._is_stopped.set()
- self._iter_notice.set()
-
- # clears our queue to unblock enqueue calls
-
- try:
- while True:
- self._unreturned_descriptors.get_nowait()
- except queue.Empty:
- pass
-
- self._reader_thread.join()
- self._reader_thread = None
-
- if self._persistence_path:
- try:
- processed_files = self.get_processed_files()
- save_processed_files(self._persistence_path, processed_files)
- except:
- pass
-
- def _read_descriptor_files(self):
- new_processed_files = {}
- remaining_files = list(self._targets)
-
- while remaining_files and not self._is_stopped.is_set():
- target = remaining_files.pop(0)
-
- if not os.path.exists(target):
- self._notify_skip_listeners(target, FileMissing())
- continue
-
- if os.path.isdir(target):
- walker = os.walk(target, followlinks = self._follow_links)
- self._handle_walker(walker, new_processed_files)
- else:
- self._handle_file(target, new_processed_files)
-
- self._processed_files = new_processed_files
-
- if not self._is_stopped.is_set():
- self._unreturned_descriptors.put(FINISHED)
-
- self._iter_notice.set()
-
- def __iter__(self):
- with self._iter_lock:
- while not self._is_stopped.is_set():
- try:
- descriptor = self._unreturned_descriptors.get_nowait()
-
- if descriptor == FINISHED:
- break
- else:
- yield descriptor
- except queue.Empty:
- self._iter_notice.wait()
- self._iter_notice.clear()
-
- def _handle_walker(self, walker, new_processed_files):
- for root, _, files in walker:
- for filename in files:
- self._handle_file(os.path.join(root, filename), new_processed_files)
-
- # this can take a while if, say, we're including the root directory
- if self._is_stopped.is_set():
- return
-
- def _handle_file(self, target, new_processed_files):
- # This is a file. Register its last modified timestamp and check if
- # it's a file that we should skip.
-
- try:
- last_modified = int(os.stat(target).st_mtime)
- last_used = self._processed_files.get(target)
- new_processed_files[target] = last_modified
- except OSError as exc:
- self._notify_skip_listeners(target, ReadFailed(exc))
- return
-
- if last_used and last_used >= last_modified:
- self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
- return
-
- # Block devices and such are never descriptors, and can cause us to block
- # for quite a while so skipping anything that isn't a regular file.
-
- if not os.path.isfile(target):
- return
-
- # The mimetypes module only checks the file extension. To actually
- # check the content (like the 'file' command) we'd need something like
- # pymagic (https://github.com/cloudburst/pymagic).
-
- target_type = mimetypes.guess_type(target)
-
- if target_type[0] in (None, 'text/plain'):
- # either '.txt' or an unknown type
- self._handle_descriptor_file(target, target_type)
- elif stem.util.system.is_tarfile(target):
- # handles gzip, bz2, and decompressed tarballs among others
- self._handle_archive(target)
- else:
- self._notify_skip_listeners(target, UnrecognizedType(target_type))
-
- def _handle_descriptor_file(self, target, mime_type):
- try:
- self._notify_read_listeners(target)
-
- with open(target, 'rb') as target_file:
- for desc in stem.descriptor.parse_file(target_file, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
- if self._is_stopped.is_set():
- return
-
- self._unreturned_descriptors.put(desc)
- self._iter_notice.set()
- except TypeError:
- self._notify_skip_listeners(target, UnrecognizedType(mime_type))
- except ValueError as exc:
- self._notify_skip_listeners(target, ParsingFailure(exc))
- except IOError as exc:
- self._notify_skip_listeners(target, ReadFailed(exc))
-
- def _handle_archive(self, target):
- try:
- with tarfile.open(target) as tar_file:
- self._notify_read_listeners(target)
-
- for tar_entry in tar_file:
- if tar_entry.isfile():
- entry = tar_file.extractfile(tar_entry)
-
- try:
- for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
- if self._is_stopped.is_set():
- return
-
- desc._set_path(os.path.abspath(target))
- desc._set_archive_path(tar_entry.name)
- self._unreturned_descriptors.put(desc)
- self._iter_notice.set()
- except TypeError as exc:
- self._notify_skip_listeners(target, ParsingFailure(exc))
- except ValueError as exc:
- self._notify_skip_listeners(target, ParsingFailure(exc))
- finally:
- entry.close()
- except IOError as exc:
- self._notify_skip_listeners(target, ReadFailed(exc))
-
- def _notify_read_listeners(self, path):
- for listener in self._read_listeners:
- listener(path)
-
- def _notify_skip_listeners(self, path, exception):
- for listener in self._skip_listeners:
- listener(path, exception)
-
- def __enter__(self):
- self.start()
- return self
-
- def __exit__(self, exit_type, value, traceback):
- self.stop()
diff --git a/test/integ/control/controller.py b/test/integ/control/controller.py
index 257d9fbc..6903c65b 100644
--- a/test/integ/control/controller.py
+++ b/test/integ/control/controller.py
@@ -12,7 +12,6 @@ import unittest
import stem.connection
import stem.control
-import stem.descriptor.reader
import stem.descriptor.router_status_entry
import stem.directory
import stem.response.protocolinfo
diff --git a/test/settings.cfg b/test/settings.cfg
index 1ec8176e..d22bec42 100644
--- a/test/settings.cfg
+++ b/test/settings.cfg
@@ -229,7 +229,6 @@ pyflakes.ignore stem/util/__init__.py => undefined name 'unicode'
pyflakes.ignore stem/util/conf.py => undefined name 'unicode'
pyflakes.ignore stem/util/test_tools.py => 'pyflakes' imported but unused
pyflakes.ignore stem/util/test_tools.py => 'pycodestyle' imported but unused
-pyflakes.ignore test/unit/descriptor/reader.py => 'bz2' imported but unused
pyflakes.ignore test/unit/response/events.py => 'from stem import *' used; unable to detect undefined names
pyflakes.ignore test/unit/response/events.py => *may be undefined, or defined from star imports: stem
pyflakes.ignore stem/util/str_tools.py => undefined name 'unicode'
@@ -254,8 +253,6 @@ test.unit_tests
|test.unit.installation.TestInstallation
|test.unit.descriptor.descriptor.TestDescriptor
|test.unit.descriptor.compression.TestCompression
-|test.unit.descriptor.export.TestExport
-|test.unit.descriptor.reader.TestDescriptorReader
|test.unit.descriptor.collector.TestCollector
|test.unit.descriptor.remote.TestDescriptorDownloader
|test.unit.descriptor.server_descriptor.TestServerDescriptor
diff --git a/test/unit/descriptor/export.py b/test/unit/descriptor/export.py
deleted file mode 100644
index d27ed241..00000000
--- a/test/unit/descriptor/export.py
+++ /dev/null
@@ -1,94 +0,0 @@
-"""
-Unit tests for stem.descriptor.export.
-"""
-
-import io
-import unittest
-
-import stem.prereq
-
-from stem.descriptor.server_descriptor import RelayDescriptor, BridgeDescriptor
-from stem.descriptor.export import export_csv, export_csv_file
-
-
-class TestExport(unittest.TestCase):
- def test_minimal_descriptor(self):
- """
- Exports a single minimal tor server descriptor.
- """
-
- desc = RelayDescriptor.create({
- 'router': 'caerSidi 71.35.133.197 9001 0 0',
- 'published': '2012-03-01 17:15:27',
- })
-
- desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = False)
- expected = 'caerSidi,71.35.133.197,2012-03-01 17:15:27\n'
- self.assertEqual(expected, desc_csv)
-
- desc_csv = export_csv(desc, included_fields = ('nickname', 'address', 'published'), header = True)
- expected = 'nickname,address,published\n' + expected
- self.assertEqual(expected, desc_csv)
-
- def test_multiple_descriptors(self):
- """
- Exports multiple descriptors, making sure that we get them back in the same
- order.
- """
-
- nicknames = ('relay1', 'relay3', 'relay2', 'caerSidi', 'zeus')
- descriptors = []
-
- for nickname in nicknames:
- router_line = '%s 71.35.133.197 9001 0 0' % nickname
- descriptors.append(RelayDescriptor.create({'router': router_line}))
-
- expected = '\n'.join(nicknames) + '\n'
- self.assertEqual(expected, export_csv(descriptors, included_fields = ('nickname',), header = False))
-
- def test_file_output(self):
- """
- Basic test for the export_csv_file() function, checking that it provides
- the same output as export_csv().
- """
-
- desc = RelayDescriptor.create()
- desc_csv = export_csv(desc)
-
- csv_buffer = io.StringIO()
- export_csv_file(csv_buffer, desc)
-
- self.assertEqual(desc_csv, csv_buffer.getvalue())
-
- def test_excludes_private_attr(self):
- """
- Checks that the default attributes for our csv output doesn't include private fields.
- """
-
- desc = RelayDescriptor.create()
- desc_csv = export_csv(desc)
-
- self.assertTrue(',signature' in desc_csv)
- self.assertFalse(',_digest' in desc_csv)
- self.assertFalse(',_annotation_lines' in desc_csv)
-
- def test_empty_input(self):
- """
- Exercises when we don't provide any descriptors.
- """
- self.assertEqual('', export_csv([]))
-
- def test_invalid_attributes(self):
- """
- Attempts to make a csv with attributes that don't exist.
- """
-
- desc = RelayDescriptor.create()
- self.assertRaises(ValueError, export_csv, desc, ('nickname', 'blarg!'))
-
- def test_multiple_descriptor_types(self):
- """
- Attempts to make a csv with multiple descriptor types.
- """
-
- self.assertRaises(ValueError, export_csv, (RelayDescriptor.create(), BridgeDescriptor.create()))
diff --git a/test/unit/descriptor/reader.py b/test/unit/descriptor/reader.py
deleted file mode 100644
index f49183e5..00000000
--- a/test/unit/descriptor/reader.py
+++ /dev/null
@@ -1,625 +0,0 @@
-"""
-Unit tests for stem.descriptor.reader.
-"""
-
-import getpass
-import io
-import os
-import shutil
-import signal
-import sys
-import tarfile
-import tempfile
-import time
-import unittest
-
-import stem.descriptor.reader
-import stem.util.str_tools
-import stem.util.system
-
-import test.unit.descriptor
-
-from unittest.mock import patch
-
-BASIC_LISTING = """
-/tmp 123
-/bin/grep 4567
-/file with spaces/and \\ stuff 890
-"""
-
-my_dir = os.path.dirname(__file__)
-DESCRIPTOR_TEST_DATA = os.path.join(my_dir, 'data')
-
-TAR_DESCRIPTORS = None
-
-
-def _get_raw_tar_descriptors():
- global TAR_DESCRIPTORS
-
- if not TAR_DESCRIPTORS:
- test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar')
- raw_descriptors = []
-
- with tarfile.open(test_path) as tar_file:
- for tar_entry in tar_file:
- if tar_entry.isfile():
- entry = tar_file.extractfile(tar_entry)
- entry.readline() # strip header
- raw_descriptors.append(entry.read().decode('utf-8', 'replace'))
- entry.close()
-
- TAR_DESCRIPTORS = raw_descriptors
-
- return TAR_DESCRIPTORS
-
-
-class SkipListener:
- def __init__(self):
- self.results = [] # (path, exception) tuples that we've received
-
- def listener(self, path, exception):
- self.results.append((path, exception))
-
-
-class TestDescriptorReader(unittest.TestCase):
- def setUp(self):
- self.temp_directory = tempfile.mkdtemp()
- self.test_listing_path = os.path.join(self.temp_directory, 'descriptor_processed_files')
-
- def tearDown(self):
- shutil.rmtree(self.temp_directory)
-
- @patch('stem.descriptor.reader.open', create = True)
- def test_load_processed_files(self, open_mock):
- """
- Successful load of content.
- """
-
- test_lines = (
- '/dir/ 0',
- '/dir/file 12345',
- '/dir/file with spaces 7138743',
- ' /dir/with extra space 12345 ',
- ' \t ',
- '',
- '/dir/after empty line 12345',
- )
-
- expected_value = {
- '/dir/': 0,
- '/dir/file': 12345,
- '/dir/file with spaces': 7138743,
- '/dir/with extra space': 12345,
- '/dir/after empty line': 12345,
- }
-
- open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes('\n'.join(test_lines)))
- self.assertEqual(expected_value, stem.descriptor.reader.load_processed_files(''))
-
- @patch('stem.descriptor.reader.open', create = True)
- def test_load_processed_files_empty(self, open_mock):
- """
- Tests the load_processed_files() function with an empty file.
- """
-
- open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes(''))
- self.assertEqual({}, stem.descriptor.reader.load_processed_files(''))
-
- @patch('stem.descriptor.reader.open', create = True)
- def test_load_processed_files_no_file(self, open_mock):
- """
- Tests the load_processed_files() function content that is malformed because
- it is missing the file path.
- """
-
- open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes(' 12345'))
- self.assertRaises(TypeError, stem.descriptor.reader.load_processed_files, '')
-
- @patch('stem.descriptor.reader.open', create = True)
- def test_load_processed_files_no_timestamp(self, open_mock):
- """
- Tests the load_processed_files() function content that is malformed because
- it is missing the timestamp.
- """
-
- open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes('/dir/file '))
- self.assertRaises(TypeError, stem.descriptor.reader.load_processed_files, '')
-
- @patch('stem.descriptor.reader.open', create = True)
- def test_load_processed_files_malformed_file(self, open_mock):
- """
- Tests the load_processed_files() function content that is malformed because
- it has an invalid file path.
- """
-
- open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes('not_an_absolute_file 12345'))
- self.assertRaises(TypeError, stem.descriptor.reader.load_processed_files, '')
-
- @patch('stem.descriptor.reader.open', create = True)
- def test_load_processed_files_malformed_timestamp(self, open_mock):
- """
- Tests the load_processed_files() function content that is malformed because
- it has a non-numeric timestamp.
- """
-
- open_mock.return_value = io.BytesIO(stem.util.str_tools._to_bytes('/dir/file 123a'))
- self.assertRaises(TypeError, stem.descriptor.reader.load_processed_files, '')
-
- def test_load_processed_files_from_data(self):
- """
- Basic sanity test for loading a processed files listing from disk.
- """
-
- test_listing_path = self._make_processed_files_listing(BASIC_LISTING)
- loaded_listing = stem.descriptor.reader.load_processed_files(test_listing_path)
-
- expected_listing = {
- '/tmp': 123,
- '/bin/grep': 4567,
- '/file with spaces/and \\ stuff': 890,
- }
-
- self.assertEqual(expected_listing, loaded_listing)
-
- def test_load_processed_files_missing(self):
- """
- Tests the load_processed_files() function with a file that doesn't exist.
- """
-
- self.assertRaises(IOError, stem.descriptor.reader.load_processed_files, '/non-existant/path')
-
- def test_load_processed_files_permissions(self):
- """
- Tests the load_processed_files() function with a file that can't be read
- due to permissions.
- """
-
- # test relies on being unable to read a file
-
- if getpass.getuser() == 'root':
- self.skipTest('(running as root)')
-
- # Skip the test on windows, since you can only set the file's
- # read-only flag with os.chmod(). For more information see...
- # http://docs.python.org/library/os.html#os.chmod
-
- if stem.util.system.is_windows():
- self.skipTest('(chmod not functional)')
-
- test_listing_path = self._make_processed_files_listing(BASIC_LISTING)
- os.chmod(test_listing_path, 0o077) # remove read permissions
- self.assertRaises(IOError, stem.descriptor.reader.load_processed_files, test_listing_path)
-
- def test_save_processed_files(self):
- """
- Basic sanity test for persisting files listings to disk.
- """
-
- initial_listing = {
- '/tmp': 123,
- '/bin/grep': 4567,
- '/file with spaces/and \\ stuff': 890,
- }
-
- # saves the initial_listing to a file then reloads it
-
- stem.descriptor.reader.save_processed_files(self.test_listing_path, initial_listing)
- loaded_listing = stem.descriptor.reader.load_processed_files(self.test_listing_path)
-
- self.assertEqual(initial_listing, loaded_listing)
-
- def test_save_processed_files_malformed(self):
- """
- Tests the save_processed_files() function with malformed data.
- """
-
- missing_filename = {'': 123}
- relative_filename = {'foobar': 123}
- string_timestamp = {'/tmp': '123a'}
- temp_path = tempfile.mkstemp(prefix = 'stem-unit-tests-', text = True)[1]
-
- for listing in (missing_filename, relative_filename, string_timestamp):
- self.assertRaises(TypeError, stem.descriptor.reader.save_processed_files, temp_path, listing)
-
- # Though our attempts to save the processed files fail we'll write an empty
- # file. Cleaning it up.
-
- try:
- os.remove(temp_path)
- except:
- pass
-
- def test_basic_example(self):
- """
- Exercises something similar to the first example in the header
- documentation, checking that some of the contents match what we'd expect.
- """
-
- # snag some of the plaintext descriptors so we can later make sure that we
- # iterate over them
-
- descriptor_entries = []
-
- descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
-
- with open(descriptor_path) as descriptor_file:
- descriptor_file.readline() # strip header
- descriptor_entries.append(descriptor_file.read())
-
- # running this test multiple times to flush out concurrency issues
-
- for _ in range(15):
- remaining_entries = list(descriptor_entries)
-
- with stem.descriptor.reader.DescriptorReader(descriptor_path) as reader:
- for descriptor in reader:
- descriptor_str = str(descriptor)
-
- if descriptor_str in remaining_entries:
- remaining_entries.remove(descriptor_str)
- else:
- # iterator is providing output that we didn't expect
- self.fail()
-
- # check that we've seen all of the descriptor_entries
- self.assertTrue(len(remaining_entries) == 0)
-
- def test_multiple_runs(self):
- """
- Runs a DescriptorReader instance multiple times over the same content,
- making sure that it can be used repeatedly.
- """
-
- descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
- reader = stem.descriptor.reader.DescriptorReader(descriptor_path)
-
- with reader:
- self.assertEqual(1, len(list(reader)))
-
- # run it a second time, this shouldn't provide any descriptors because we
- # have already read it
-
- with reader:
- self.assertEqual(0, len(list(reader)))
-
- # clear the DescriptorReader's memory of seeing the file and run it again
-
- reader.set_processed_files([])
-
- with reader:
- self.assertEqual(1, len(list(reader)))
-
- def test_buffer_size(self):
- """
- Checks that we can process sets of descriptors larger than our buffer size,
- that we don't exceed it, and that we can still stop midway through reading
- them.
- """
-
- reader = stem.descriptor.reader.DescriptorReader(DESCRIPTOR_TEST_DATA, buffer_size = 2)
-
- with reader:
- self.assertTrue(reader.get_buffered_descriptor_count() <= 2)
- time.sleep(0.001)
- self.assertTrue(reader.get_buffered_descriptor_count() <= 2)
-
- def test_persistence_path(self):
- """
- Check that the persistence_path argument loads and saves a a processed
- files listing.
- """
-
- descriptor_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
-
- # First run where the persistence_path doesn't yet exist. This just tests
- # the saving functionality.
-
- reader = stem.descriptor.reader.DescriptorReader(descriptor_path, persistence_path = self.test_listing_path)
-
- with reader:
- self.assertEqual(1, len(list(reader)))
-
- # check that we've saved reading example_descriptor
- self.assertTrue(os.path.exists(self.test_listing_path))
-
- with open(self.test_listing_path) as persistence_file:
- persistance_file_contents = persistence_file.read()
- self.assertTrue(persistance_file_contents.startswith(descriptor_path))
-
- # Try running again with a new reader but the same persistance path, if it
- # reads and takes the persistence_path into account then it won't read the
- # descriptor file. This in essence just tests its loading functionality.
-
- reader = stem.descriptor.reader.DescriptorReader(descriptor_path, persistence_path = self.test_listing_path)
-
- with reader:
- self.assertEqual(0, len(list(reader)))
-
- def test_archived_paths(self):
- """
- Checks the get_path() and get_archive_path() for a tarball.
- """
-
- expected_archive_paths = (
- 'descriptor_archive/0/2/02c311d3d789f3f55c0880b5c85f3c196343552c',
- 'descriptor_archive/1/b/1bb798cae15e21479db0bc700767eee4733e9d4a',
- 'descriptor_archive/1/b/1ef75fef564180d8b3f72c6f8635ff0cd855f92c',
- )
-
- test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar')
-
- with stem.descriptor.reader.DescriptorReader(test_path) as reader:
- for desc in reader:
- self.assertEqual(test_path, desc.get_path())
- self.assertTrue(desc.get_archive_path() in expected_archive_paths)
-
- def test_archived_uncompressed(self):
- """
- Checks that we can read descriptors from an uncompressed archive.
- """
-
- expected_results = _get_raw_tar_descriptors()
- test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar')
-
- with stem.descriptor.reader.DescriptorReader(test_path) as reader:
- read_descriptors = [str(desc) for desc in list(reader)]
- self.assertEqual(expected_results, read_descriptors)
-
- def test_archived_gzip(self):
- """
- Checks that we can read descriptors from a gzipped archive.
- """
-
- expected_results = _get_raw_tar_descriptors()
- test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar.gz')
-
- with stem.descriptor.reader.DescriptorReader(test_path) as reader:
- read_descriptors = [str(desc) for desc in list(reader)]
- self.assertEqual(expected_results, read_descriptors)
-
- def test_archived_bz2(self):
- """
- Checks that we can read descriptors from an bzipped archive.
- """
-
- # when python's compiled it only optionally has bz2 support
-
- try:
- import bz2
- except ImportError:
- self.skipTest('(bz2 unsupported}')
-
- expected_results = _get_raw_tar_descriptors()
- test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'descriptor_archive.tar.bz2')
-
- with stem.descriptor.reader.DescriptorReader(test_path) as reader:
- read_descriptors = [str(desc) for desc in list(reader)]
- self.assertEqual(expected_results, read_descriptors)
-
- def test_stop(self):
- """
- Runs a DescriptorReader over the root directory, then checks that calling
- stop() makes it terminate in a timely fashion.
- """
-
- # Skip on windows since SIGALRM is unavailable
-
- if stem.util.system.is_windows():
- self.skipTest('(SIGALRM unavailable)')
-
- is_test_running = True
- reader = stem.descriptor.reader.DescriptorReader('/usr')
-
- # Fails the test after a couple seconds if we don't finish successfully.
- # Depending on what we're blocked on this might not work when the test
- # fails, requiring that we give a manual kill to the test.
-
- def timeout_handler(signum, frame):
- if is_test_running:
- self.fail()
-
- signal.signal(signal.SIGALRM, timeout_handler)
- signal.alarm(2)
-
- reader.start()
- time.sleep(0.001)
- reader.stop()
- is_test_running = False
-
- def test_get_processed_files(self):
- """
- Checks that get_processed_files() provides the expected results after
- iterating over our test data.
- """
-
- desc_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
- last_modified = int(os.stat(desc_path).st_mtime)
-
- reader = stem.descriptor.reader.DescriptorReader(desc_path)
-
- with reader:
- list(reader) # iterates over all of the descriptors
-
- self.assertEqual({desc_path: last_modified}, reader.get_processed_files())
-
- def test_skip_nondescriptor_contents(self):
- """
- Checks that the reader properly reports when it skips both binary and
- plaintext non-descriptor files.
- """
-
- skip_listener = SkipListener()
- reader = stem.descriptor.reader.DescriptorReader(os.path.join(DESCRIPTOR_TEST_DATA, 'unparseable'))
- reader.register_skip_listener(skip_listener.listener)
-
- expected_skip_files = ('riddle', 'tiny.png', 'vote', 'new_metrics_type', 'cached-microdesc-consensus_with_carriage_returns', 'extrainfo_nonascii_v3_reqs')
-
- with reader:
- list(reader) # iterates over all of the descriptors
-
- # strip anything with a .swp suffix (vim tmp files)
-
- skip_listener.results = [(path, exc) for (path, exc) in skip_listener.results if not path.endswith('.swp')]
-
- if len(skip_listener.results) != len(expected_skip_files):
- expected_label = ',\n '.join(expected_skip_files)
- results_label = ',\n '.join(['%s (%s)' % (path, exc) for (path, exc) in skip_listener.results])
-
- self.fail('Skipped files that we should have been able to parse.\n\nExpected:\n %s\n\nResult:\n %s' % (expected_label, results_label))
-
- for skip_path, skip_exception in skip_listener.results:
- if not os.path.basename(skip_path) in expected_skip_files:
- self.fail('Unexpected non-descriptor content: %s' % skip_path)
-
- self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.UnrecognizedType))
-
- def test_skip_listener_already_read(self):
- """
- Checks that calling set_processed_files() prior to reading makes us skip
- those files. This also doubles for testing that skip listeners are notified
- of files that we've already read.
- """
-
- # path that we want the DescriptorReader to skip
-
- test_path = os.path.join(DESCRIPTOR_TEST_DATA, 'example_descriptor')
- initial_processed_files = {test_path: sys.maxsize}
-
- skip_listener = SkipListener()
- reader = stem.descriptor.reader.DescriptorReader(test_path)
- reader.register_skip_listener(skip_listener.listener)
- reader.set_processed_files(initial_processed_files)
-
- self.assertEqual(initial_processed_files, reader.get_processed_files())
-
- with reader:
- list(reader) # iterates over all of the descriptors
-
- self.assertEqual(1, len(skip_listener.results))
-
- skipped_path, skip_exception = skip_listener.results[0]
- self.assertEqual(test_path, skipped_path)
- self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.AlreadyRead))
- self.assertEqual(sys.maxsize, skip_exception.last_modified_when_read)
-
- def test_skip_listener_unrecognized_type(self):
- """
- Listens for a file that's skipped because its file type isn't recognized.
- """
-
- # types are solely based on file extensions so making something that looks
- # like an png image
-
- test_path = os.path.join(self.temp_directory, 'test.png')
-
- try:
- test_file = open(test_path, 'w')
- test_file.write('test data for test_skip_listener_unrecognized_type()')
- test_file.close()
-
- skip_listener = SkipListener()
- reader = stem.descriptor.reader.DescriptorReader(test_path)
- reader.register_skip_listener(skip_listener.listener)
-
- with reader:
- list(reader) # iterates over all of the descriptors
-
- self.assertEqual(1, len(skip_listener.results))
-
- skipped_path, skip_exception = skip_listener.results[0]
- self.assertEqual(test_path, skipped_path)
- self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.UnrecognizedType))
- self.assertTrue(skip_exception.mime_type in (('image/png', None), ('image/x-png', None)))
- finally:
- if os.path.exists(test_path):
- os.remove(test_path)
-
- def test_skip_listener_read_failure(self):
- """
- Listens for a file that's skipped because we lack read permissions.
- """
-
- # test relies on being unable to read a file
-
- if getpass.getuser() == 'root':
- self.skipTest('(running as root)')
- elif stem.util.system.is_windows():
- self.skipTest('(chmod not functional)')
-
- test_path = os.path.join(self.temp_directory, 'secret_file')
-
- try:
- test_file = open(test_path, 'w')
- test_file.write('test data for test_skip_listener_unrecognized_type()')
- test_file.close()
-
- os.chmod(test_path, 0o077) # remove read permissions
-
- skip_listener = SkipListener()
- reader = stem.descriptor.reader.DescriptorReader(test_path)
- reader.register_skip_listener(skip_listener.listener)
-
- with reader:
- list(reader) # iterates over all of the descriptors
-
- self.assertEqual(1, len(skip_listener.results))
-
- skipped_path, skip_exception = skip_listener.results[0]
- self.assertEqual(test_path, skipped_path)
- self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.ReadFailed))
- self.assertTrue(isinstance(skip_exception.exception, IOError))
- finally:
- if os.path.exists(test_path):
- os.remove(test_path)
-
- def test_skip_listener_file_missing(self):
- """
- Listens for a file that's skipped because the file doesn't exist.
- """
-
- test_path = '/non-existant/path'
-
- skip_listener = SkipListener()
- reader = stem.descriptor.reader.DescriptorReader(test_path)
- reader.register_skip_listener(skip_listener.listener)
-
- with reader:
- list(reader) # iterates over all of the descriptors
-
- self.assertEqual(1, len(skip_listener.results))
-
- skipped_path, skip_exception = skip_listener.results[0]
- self.assertEqual(test_path, skipped_path)
- self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.FileMissing))
-
- def test_unrecognized_metrics_type(self):
- """
- Parses a file that has a valid metrics header, but an unrecognized type.
- """
-
- test_path = test.unit.descriptor.get_resource('unparseable/new_metrics_type')
-
- skip_listener = SkipListener()
- reader = stem.descriptor.reader.DescriptorReader(test_path)
- reader.register_skip_listener(skip_listener.listener)
-
- with reader:
- list(reader) # iterates over all of the descriptors
-
- self.assertEqual(1, len(skip_listener.results))
-
- skipped_path, skip_exception = skip_listener.results[0]
- self.assertEqual(test_path, skipped_path)
- self.assertTrue(isinstance(skip_exception, stem.descriptor.reader.UnrecognizedType))
- self.assertEqual((None, None), skip_exception.mime_type)
-
- def _make_processed_files_listing(self, contents):
- """
- Writes the given 'processed file' listing to disk, returning the path where
- it is located.
- """
-
- with open(self.test_listing_path, 'w') as test_listing_file:
- test_listing_file.write(contents)
-
- return self.test_listing_path
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits