[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [stem/master] Adding tarfile support to stem.descriptor.parse_file()
commit a5596873fd544d79c53f0c0123caa89bdb5a9f72
Author: Damian Johnson <atagar@xxxxxxxxxxxxxx>
Date: Sat May 31 14:14:06 2014 -0700
Adding tarfile support to stem.descriptor.parse_file()
A while back Karsten tried to hand a tarfile to our parse_file() method and had
confusing results...
https://trac.torproject.org/projects/tor/ticket/10977
Expanding our parse_file() function so it'll happily handle tarfiles and tar
paths.
Note that the DescriptorReader, which already had tar support, is keeping its
own separate implementation. This is because using the parse_file()'s tar
support has a couple drawbacks...
1. The reader then couldn't stop in the middle of handling tarballs.
2. If a tarball contains both descriptor and non-descriptor content then the
DescriptorReader can handle that. parse_file(), however, raises an
exception.
---
docs/change_log.rst | 2 +
stem/descriptor/__init__.py | 55 +++++++++++++++++++++++++---
stem/descriptor/reader.py | 24 +++---------
stem/util/system.py | 34 ++++++++++++++++-
test/integ/descriptor/server_descriptor.py | 31 ++++++++++++++++
5 files changed, 121 insertions(+), 25 deletions(-)
diff --git a/docs/change_log.rst b/docs/change_log.rst
index 279f2bb..7f2982b 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -59,6 +59,7 @@ The following are only available within Stem's `git repository
* **Descriptors**
+ * Added tarfile support to :func:`~stem.descriptor.__init__.parse_file` (:trac:`10977`)
* Added microdescriptor's new identity and identity_type attributes (:spec:`22cda72`)
* **Utilities**
@@ -116,6 +117,7 @@ and a myriad of smaller improvements and fixes.
* Added :func:`stem.util.system.get_user`
* Added :func:`stem.util.system.get_start_time`
* Added :func:`stem.util.system.get_bsd_jail_path`
+ * Added :func:`stem.util.system.is_tarfile`
* Added :func:`stem.util.connection.is_private_address`
* **Website**
diff --git a/stem/descriptor/__init__.py b/stem/descriptor/__init__.py
index 2d7cc69..4270cb9 100644
--- a/stem/descriptor/__init__.py
+++ b/stem/descriptor/__init__.py
@@ -52,10 +52,12 @@ __all__ = [
import os
import re
+import tarfile
import stem.prereq
import stem.util.enum
import stem.util.str_tools
+import stem.util.system
try:
# added in python 2.7
@@ -127,7 +129,7 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
my_descriptor_file = open(descriptor_path, 'rb')
- :param str,file descriptor_file: path or opened file with the descriptor contents
+ :param str,file,tarfile descriptor_file: path or opened file with the descriptor contents
:param str descriptor_type: `descriptor type <https://metrics.torproject.org/formats.html#descriptortypes>`_, this is guessed if not provided
:param bool validate: checks the validity of the descriptor's content if
**True**, skips these checks otherwise
@@ -143,14 +145,23 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
* **IOError** if unable to read from the descriptor_file
"""
- # if we got a path then open that file for parsing
+ # Delegate to a helper if this is a path or tarfile.
+
+ handler = None
if isinstance(descriptor_file, (bytes, unicode)):
- with open(descriptor_file) as desc_file:
- for desc in parse_file(desc_file, descriptor_type, validate, document_handler, **kwargs):
- yield desc
+ if stem.util.system.is_tarfile(descriptor_file):
+ handler = _parse_file_for_tar_path
+ else:
+ handler = _parse_file_for_path
+ elif isinstance(descriptor_file, tarfile.TarFile):
+ handler = _parse_file_for_tarfile
+
+ if handler:
+ for desc in handler(descriptor_file, descriptor_type, validate, document_handler, **kwargs):
+ yield desc
- return
+ return
# The tor descriptor specifications do not provide a reliable method for
# identifying a descriptor file's type and version so we need to guess
@@ -210,6 +221,38 @@ def parse_file(descriptor_file, descriptor_type = None, validate = True, documen
raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line))
+def _parse_file_for_path(descriptor_file, *args, **kwargs):
+ with open(descriptor_file, 'rb') as desc_file:
+ for desc in parse_file(desc_file, *args, **kwargs):
+ yield desc
+
+
+def _parse_file_for_tar_path(descriptor_file, *args, **kwargs):
+ # TODO: use 'with' for tarfile after dropping python 2.6 support
+ tar_file = tarfile.open(descriptor_file)
+
+ try:
+ for desc in parse_file(tar_file, *args, **kwargs):
+ desc._set_path(os.path.abspath(descriptor_file))
+ yield desc
+ finally:
+ if tar_file:
+ tar_file.close()
+
+
+def _parse_file_for_tarfile(descriptor_file, *args, **kwargs):
+ for tar_entry in descriptor_file:
+ if tar_entry.isfile():
+ entry = descriptor_file.extractfile(tar_entry)
+
+ try:
+ for desc in parse_file(entry, *args, **kwargs):
+ desc._set_archive_path(entry.name)
+ yield desc
+ finally:
+ entry.close()
+
+
def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler, **kwargs):
# Parses descriptor files from metrics, yielding individual descriptors. This
# throws a TypeError if the descriptor_type or version isn't recognized.
diff --git a/stem/descriptor/reader.py b/stem/descriptor/reader.py
index 3fb4166..05c7533 100644
--- a/stem/descriptor/reader.py
+++ b/stem/descriptor/reader.py
@@ -85,6 +85,7 @@ import threading
import stem.descriptor
import stem.prereq
+import stem.util.system
# flag to indicate when the reader thread is out of descriptor files to read
FINISHED = 'DONE'
@@ -487,24 +488,10 @@ class DescriptorReader(object):
target_type = mimetypes.guess_type(target)
- # Checking if it's a tar file may fail due to permissions so failing back
- # to the mime type...
- #
- # IOError: [Errno 13] Permission denied: '/vmlinuz.old'
- #
- # With python 3 insuffient permissions raises an AttributeError instead...
- #
- # http://bugs.python.org/issue17059
-
- try:
- is_tar = tarfile.is_tarfile(target)
- except (IOError, AttributeError):
- is_tar = target_type[0] == 'application/x-tar'
-
if target_type[0] in (None, 'text/plain'):
# either '.txt' or an unknown type
self._handle_descriptor_file(target, target_type)
- elif is_tar:
+ elif stem.util.system.is_tarfile(target):
# handles gzip, bz2, and decompressed tarballs among others
self._handle_archive(target)
else:
@@ -529,9 +516,10 @@ class DescriptorReader(object):
self._notify_skip_listeners(target, ReadFailed(exc))
def _handle_archive(self, target):
- # TODO: This would be nicer via the 'with' keyword, but tarfile's __exit__
- # method was added sometime after python 2.5. We should change this when
- # we drop python 2.5 support.
+ # TODO: When dropping python 2.6 support go back to using 'with' for
+ # tarfiles...
+ #
+ # http://bugs.python.org/issue7232
tar_file = None
diff --git a/stem/util/system.py b/stem/util/system.py
index 89317df..d24d34b 100644
--- a/stem/util/system.py
+++ b/stem/util/system.py
@@ -16,6 +16,8 @@ best-effort, providing **None** if the lookup fails.
is_available - determines if a command is available on this system
is_running - determines if a given process is running
+ call - runs the given system command and provides back the results
+
get_name_by_pid - gets the name for a process by the given pid
get_pid_by_name - gets the pid for a process by the given name
get_pid_by_port - gets the pid for a process listening to a given port
@@ -25,9 +27,11 @@ best-effort, providing **None** if the lookup fails.
get_start_time - provides the unix timestamp when the process started
get_bsd_jail_id - provides the BSD jail id a given process is running within
get_bsd_jail_path - provides the path of the given BSD jail
+
+ is_tarfile - checks if the given path is a tarball
expand_path - expands relative paths and ~ entries
files_with_suffix - provides files with the given suffix
- call - runs the given system command and provides back the results
+
get_process_name - provides our process' name
set_process_name - changes our process' name
@@ -35,9 +39,11 @@ best-effort, providing **None** if the lookup fails.
import ctypes
import ctypes.util
+import mimetypes
import os
import platform
import subprocess
+import tarfile
import time
import stem.util.proc
@@ -763,6 +769,32 @@ def get_bsd_jail_path(jid):
return None
+def is_tarfile(path):
+ """
+ Returns if the path belongs to a tarfile or not.
+
+ .. versionadded:: 1.2.0
+
+ :param str path: path to be checked
+
+ :returns: **True** if the path belongs to a tarball, **False** otherwise
+ """
+
+ # Checking if it's a tar file may fail due to permissions so failing back
+ # to the mime type...
+ #
+ # IOError: [Errno 13] Permission denied: '/vmlinuz.old'
+ #
+ # With python 3 insuffient permissions raises an AttributeError instead...
+ #
+ # http://bugs.python.org/issue17059
+
+ try:
+ return tarfile.is_tarfile(path)
+ except (IOError, AttributeError):
+ return mimetypes.guess_type(path)[0] == 'application/x-tar'
+
+
def expand_path(path, cwd = None):
"""
Provides an absolute path, expanding tildes with the user's home and
diff --git a/test/integ/descriptor/server_descriptor.py b/test/integ/descriptor/server_descriptor.py
index 7b4645c..6d16add 100644
--- a/test/integ/descriptor/server_descriptor.py
+++ b/test/integ/descriptor/server_descriptor.py
@@ -4,6 +4,7 @@ Integration tests for stem.descriptor.server_descriptor.
import datetime
import os
+import tarfile
import unittest
import stem.control
@@ -15,8 +16,38 @@ import test.runner
from test.integ.descriptor import get_resource
+TARFILE_PATH = os.path.join(os.path.dirname(__file__), 'data', 'descriptor_archive.tar')
+TARFILE_FINGERPRINTS = set([
+ u'B6D83EC2D9E18B0A7A33428F8CFA9C536769E209',
+ u'E0BD57A11F00041A9789577C53A1B784473669E4',
+ u'1F43EE37A0670301AD9CB555D94AFEC2C89FDE86',
+])
+
class TestServerDescriptor(unittest.TestCase):
+ def test_with_tarfile_path(self):
+ """
+ Fetch server descriptors via parse_file() for a tarfile path.
+ """
+
+ descriptors = list(stem.descriptor.parse_file(TARFILE_PATH))
+ self.assertEqual(3, len(descriptors))
+
+ fingerprints = set([desc.fingerprint for desc in descriptors])
+ self.assertEqual(TARFILE_FINGERPRINTS, fingerprints)
+
+ def test_with_tarfile_object(self):
+ """
+ Fetch server descriptors via parse_file() for a tarfile object.
+ """
+
+ with tarfile.open(TARFILE_PATH) as tar_file:
+ descriptors = list(stem.descriptor.parse_file(tar_file))
+ self.assertEqual(3, len(descriptors))
+
+ fingerprints = set([desc.fingerprint for desc in descriptors])
+ self.assertEqual(TARFILE_FINGERPRINTS, fingerprints)
+
def test_metrics_descriptor(self):
"""
Parses and checks our results against a server descriptor from metrics.
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits