[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [metrics-utils/master] No longer assumes Apache time zone == server time zone. Also, the header is fixed to reflect proper licensing terms.
Author: kiyoto <kiyoto@xxxxxxxxxxxxxxxxxxxx>
Date: Sat, 23 Oct 2010 08:23:25 -0700
Subject: No longer assumes Apache time zone == server time zone. Also, the header is fixed to reflect proper licensing terms.
Commit: 68a7a5f99809c0c35f1bb73c7bc547b9282d2422
---
visitor/visitor.py | 21 +++++++++++++--------
1 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/visitor/visitor.py b/visitor/visitor.py
index c15134d..6950cfe 100644
--- a/visitor/visitor.py
+++ b/visitor/visitor.py
@@ -1,4 +1,5 @@
-# author: Kiyoto Tamura <owenestea@xxxxxxxxx>
+# Copyright 2010 The Tor Project
+# See LICENSE for licensing information
#
# A Python port of Karsten Loesing's VisiTor.
#
@@ -14,7 +15,7 @@ from cStringIO import StringIO
# regexes used in the script
IP_RE = re.compile(r'(\d+\.){3}\d+')
-APACHE_DATETIME = re.compile(r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}) -\d{4}\]')
+APACHE_DATETIME = re.compile(r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}) ([+-]\d{4})\]')
TOR_USERAGENTS = [('torbutton1_2_0', re.compile(r'Mozilla/5\.0 \(Windows; U; Windows NT 5\.1; '
r'[a-z]{2}-[A-Z]{2}; rv\:1\.8\.1\.16\) '
r'Gecko/20080702 Firefox/2\.0\.0\.16')),
@@ -56,13 +57,17 @@ def get_exitlist(exitlist_filepath):
return exitlist
-def apache_time2datetime(time_str):
+def apache_time2datetime(time_str, timediff_str):
"""
Transforms the apache time to a Python datetime object.
"""
- # We need to convert the time to UTC
- yr, mo, d, h, m, s, _, _, _ = gmtime(mktime(strptime(time_str, '%d/%b/%Y:%H:%M:%S')))
- return datetime(yr, mo, d, h, m, s)
+ # the apache timezone diff format is like -?xx00 where
+ # `xx00`ranges from 0000 to 2300
+ # Note the division by 36 is 60 * 60 / 100
+ yr, mo, d, h, m, s, _, _, _ = strptime(time_str, '%d/%b/%Y:%H:%M:%S')
+ local_datetime = datetime(yr, mo, d, h, m, s)
+ timezone_diff = timedelta(0, int(timediff_str) * 36)
+ return local_datetime - timezone_diff
def parse_apache_line(log_line):
"""
@@ -76,8 +81,8 @@ def parse_apache_line(log_line):
apache_datetime = APACHE_DATETIME.search(log_line)
if apache_datetime is None:
raise ApacheParseError("Could not match the datetime for the line %s"%log_line)
- apache_datetime = apache_time2datetime(apache_datetime.group(1))
-
+ apache_datetime = apache_time2datetime(apache_datetime.group(1),
+ apache_datetime.group(2))
user_agent = log_line.split('" ')[-1].rstrip('\n')
return ip, user_agent, apache_datetime # maybe turn it into a dict if it gets confusing
--
1.7.1