[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] [metrics-utils/master] No longer assumes Apache time zone == server time zone. Also, the header is fixed to reflect proper licensing terms.



Author: kiyoto <kiyoto@xxxxxxxxxxxxxxxxxxxx>
Date: Sat, 23 Oct 2010 08:23:25 -0700
Subject: No longer assumes Apache time zone == server time zone. Also, the header is fixed to reflect proper licensing terms.
Commit: 68a7a5f99809c0c35f1bb73c7bc547b9282d2422

---
 visitor/visitor.py |   21 +++++++++++++--------
 1 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/visitor/visitor.py b/visitor/visitor.py
index c15134d..6950cfe 100644
--- a/visitor/visitor.py
+++ b/visitor/visitor.py
@@ -1,4 +1,5 @@
-# author: Kiyoto Tamura <owenestea@xxxxxxxxx>
+# Copyright 2010 The Tor Project
+# See LICENSE for licensing information
 # 
 # A Python port of Karsten Loesing's VisiTor.
 #
@@ -14,7 +15,7 @@ from cStringIO import StringIO
 
 # regexes used in the script
 IP_RE = re.compile(r'(\d+\.){3}\d+')
-APACHE_DATETIME = re.compile(r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}) -\d{4}\]')
+APACHE_DATETIME = re.compile(r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}) ([+-]\d{4})\]')
 TOR_USERAGENTS = [('torbutton1_2_0', re.compile(r'Mozilla/5\.0 \(Windows; U; Windows NT 5\.1; '
                                                 r'[a-z]{2}-[A-Z]{2}; rv\:1\.8\.1\.16\) '
                                                 r'Gecko/20080702 Firefox/2\.0\.0\.16')),
@@ -56,13 +57,17 @@ def get_exitlist(exitlist_filepath):
 
     return exitlist
 
-def apache_time2datetime(time_str):
+def apache_time2datetime(time_str, timediff_str):
     """
     Transforms the apache time to a Python datetime object.
     """
-    # We need to convert the time to UTC
-    yr, mo, d, h, m, s, _, _, _ = gmtime(mktime(strptime(time_str, '%d/%b/%Y:%H:%M:%S')))
-    return datetime(yr, mo, d, h, m, s)
+    # the apache timezone diff format is like -?xx00 where 
+    # `xx00`ranges from 0000 to 2300
+    # Note the division by 36 is 60 * 60 / 100
+    yr, mo, d, h, m, s, _, _, _ = strptime(time_str, '%d/%b/%Y:%H:%M:%S')
+    local_datetime = datetime(yr, mo, d, h, m, s)
+    timezone_diff = timedelta(0, int(timediff_str) * 36)
+    return local_datetime - timezone_diff
 
 def parse_apache_line(log_line):
     """
@@ -76,8 +81,8 @@ def parse_apache_line(log_line):
     apache_datetime = APACHE_DATETIME.search(log_line)
     if apache_datetime is None:
         raise ApacheParseError("Could not match the datetime for the line %s"%log_line)
-    apache_datetime = apache_time2datetime(apache_datetime.group(1))
-
+    apache_datetime = apache_time2datetime(apache_datetime.group(1), 
+                                           apache_datetime.group(2))
     user_agent = log_line.split('" ')[-1].rstrip('\n')
 
     return ip, user_agent, apache_datetime # maybe turn it into a dict if it gets confusing
-- 
1.7.1