[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [torbel/master] Add metadata to CSV export format.
commit 8323b633dbccba0b3e06362cc8961a6f303557d8
Author: Harry Bock <hbock@xxxxxxxxxxx>
Date: Wed Oct 20 00:43:42 2010 -0400
Add metadata to CSV export format.
Add metadata to CSV format specification and implementation
(query and export). This allows torbel to know what version
of the CSV format to use, allowing for changes in the future
and backward compatibility.
Not implemented yet for the JSON format.
---
__init__.py | 2 ++
controller.py | 4 ++--
doc/data-spec.txt | 13 +++++++++++++
query.py | 24 ++++++++++++++++++++++++
4 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/__init__.py b/__init__.py
index e69de29..b5bf7e9 100644
--- a/__init__.py
+++ b/__init__.py
@@ -0,0 +1,2 @@
+# TorBEL export version format.
+__export_version__ = 1
diff --git a/controller.py b/controller.py
index 29beaff..807ce18 100644
--- a/controller.py
+++ b/controller.py
@@ -24,7 +24,7 @@ from twisted.internet import reactor
from TorCtl import TorCtl, TorUtil
# torbel submodules
-from torbel import scheduler, network, utils
+from torbel import scheduler, network, utils, __export_version__
from torbel.logger import *
from torbel.router import RouterRecord
@@ -598,7 +598,7 @@ class Controller(TorCtl.EventHandler):
csv_file = open(fn_new, "w")
out = csv.writer(csv_file, dialect = csv.excel)
-
+ out.writerow(["torbel", __export_version__])
# FIXME: Is it safe to just take the itervalues list?
with self.consensus_cache_lock:
for router in self.router_cache.itervalues():
diff --git a/doc/data-spec.txt b/doc/data-spec.txt
index c5f4014..9c39d87 100644
--- a/doc/data-spec.txt
+++ b/doc/data-spec.txt
@@ -12,6 +12,8 @@ Status: Draft
This document is a work-in-progress and the data format may change quickly
during the summer in response to demand and implementation problems.
+ This document describes TorBEL export data format version 1.
+
1. Exported Data
1.1. Records
@@ -152,6 +154,17 @@ Status: Draft
Fields that contain an escaped double quote are also enclosed in a set
of double quotes.
+ Metadata indicating the CSV export format version must be present on the first
+ line in the format:
+
+ torbel,VERSION
+
+ Where VERSION is the export format version as an integer, and torbel is
+ the literal string 'torbel'. Additional metadata fields may be added
+ in future export versions and consumers MUST handle extraneous fields
+ on this line not specified in this document.
+
+ Following the metadata line, all remaining lines are router data rows.
The fields of each row are, in order:
ExitAddress, RouterID, Nickname, LastTestedTimestamp, InConsensus,
diff --git a/query.py b/query.py
index 69e9ec9..16a6e7a 100644
--- a/query.py
+++ b/query.py
@@ -10,6 +10,7 @@ import ipaddr
import sys
from socket import inet_aton, inet_ntoa
from logger import *
+from torbel import __export_version__
if sys.version_info >= (2,6):
import json
@@ -189,10 +190,14 @@ class ExitPolicyRule:
return "reject " + ip + ":" + port
class ExitList:
+ class ImportError(ValueError):
+ pass
+
def __init__(self, filename, status_filename = None):
self.cache_ip = {}
self.cache_id = {}
+ self.version = None
self.next_update = None
self.last_update = None
self.export_files = []
@@ -286,6 +291,25 @@ class ExitList:
the TorBEL data-spec document. """
reader = csv.reader(infile, dialect = "excel")
record = 1
+ # Grab metadata row and export format version.
+ metadata = reader.next()
+ try:
+ self.version = int(metadata[1])
+ if metadata[0] != "torbel":
+ raise self.ImportError("Invalid TorBEL export format.")
+ if self.version > __export_version__:
+ raise self.ImportError("Export version %d not supported!" % self.version)
+
+ # ValueError will be raised if the first value on the metadata line
+ # is not an integer.
+ # IndexError is raised if the metadata line is empty. Not quite sure
+ # if this is actually possible!
+ # StopIteration is raised if we try to read from an empty file.
+ # All of these indicate the TorBEL export file is not actually
+ # a valid export.
+ except (ValueError, IndexError, StopIteration):
+ raise self.ImportError("Invalid TorBEL export format.")
+
for r in reader:
try:
data = {
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits