[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] [metrics-utils/master 3/4] Support reading decompressed web server log from stdin.



Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Fri, 24 Sep 2010 13:28:32 +0200
Subject: Support reading decompressed web server log from stdin.
Commit: 51c9df86f55bbcebe0bbda79a6ef4269b2a7691a

---
 visitor/ChangeLog    |    2 ++
 visitor/HOWTO        |    8 ++++++--
 visitor/VisiTor.java |   49 +++++++++++++++++++++++++++++++------------------
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/visitor/ChangeLog b/visitor/ChangeLog
index 01c2294..e187b30 100644
--- a/visitor/ChangeLog
+++ b/visitor/ChangeLog
@@ -2,6 +2,8 @@ VisiTor change log:
 
 Changes in version 0.0.3 - 2010-09-2?
   - Support parsing .gz-compressed web server logs. Suggested by murb.
+  - Support reading decompressed web server log from stdin. Suggested by
+    murb.
 
 Changes in version 0.0.2 - 2010-09-22
   - Don't break if we're given zero exit lists.
diff --git a/visitor/HOWTO b/visitor/HOWTO
index 57d7a8f..2e1267b 100644
--- a/visitor/HOWTO
+++ b/visitor/HOWTO
@@ -58,8 +58,9 @@ for Linux and Mac OS X; commands for Windows may vary):
   $ javac VisiTor.java
 
 - Run the Java application, providing it with the parameters it needs.
-  Note that the fourth parameter that writes out the server log part with
-  Tor user requests is optional:
+  Passing '-' (without quotes) as web server log file name means that the
+  web server log will be read from stdin. Note that the fourth parameter
+  that writes out the server log part with Tor user requests is optional:
 
   java VisiTor <web server log> <exit list directory> <output file>
        [<server log part with Tor user requests>]
@@ -70,6 +71,9 @@ for Linux and Mac OS X; commands for Windows may vary):
 
   $ java VisiTor access_log.gz exitlists/ out.csv tor_access_log
 
+  $ gunzip -c access_log.gz | java VisiTor - exitlists/ out.csv \
+        tor_access_log
+
 - Find the results in /home/you/visitor/out.csv in a format that can be
   imported by any spreadsheet application like OpenOffice.org Calc or
   processed by R.
diff --git a/visitor/VisiTor.java b/visitor/VisiTor.java
index 624fd3a..2a9fb1e 100644
--- a/visitor/VisiTor.java
+++ b/visitor/VisiTor.java
@@ -48,32 +48,45 @@ public final class VisiTor {
 
     /* Read the first line of the web server log to let the user know
      * early if we think we can't parse it. */
-    System.out.print("Reading the first line of your web server log '"
-        + webServerLog + "' to see if we can parse it... ");
+    System.out.print("Reading the first line of your web server log "
+        + (webServerLog.equals("-") ? "from stdin" : "'" + webServerLog
+        + "'") + " to see if we can parse it... ");
     SimpleDateFormat logFormat = new SimpleDateFormat(
         "[dd/MMM/yyyy:HH:mm:ss Z]");
     logFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
     Pattern ipAddressPattern = Pattern.compile("(\\d+\\.){3}\\d+");
     BufferedReader webServerLogReader = null;
     String logLine = null;
-    File logFile = new File(webServerLog);
-    if (!logFile.exists()) {
-      System.out.println("FAILED\nFile does not exist! Exiting!");
-      return;
-    }
-    try {
-      if (webServerLog.endsWith(".gz")) {
+    if (webServerLog.equals("-")) {
+      try {
         webServerLogReader = new BufferedReader(new InputStreamReader(
-            new GZIPInputStream(new FileInputStream(webServerLog))));
-      } else {
-        webServerLogReader = new BufferedReader(new FileReader(
-            webServerLog));
+            System.in));
+        logLine = webServerLogReader.readLine();
+      } catch (IOException e) {
+        System.out.println("FAILED\nCould not read from stdin! Exiting!");
+        e.printStackTrace();
+        return;
+      }
+    } else {
+      File logFile = new File(webServerLog);
+      if (!logFile.exists()) {
+        System.out.println("FAILED\nFile does not exist! Exiting!");
+        return;
+      }
+      try {
+        if (webServerLog.endsWith(".gz")) {
+          webServerLogReader = new BufferedReader(new InputStreamReader(
+              new GZIPInputStream(new FileInputStream(webServerLog))));
+        } else {
+          webServerLogReader = new BufferedReader(new FileReader(
+              webServerLog));
+        }
+        logLine = webServerLogReader.readLine();
+      } catch (IOException e) {
+        System.out.println("FAILED\nCould not read file! Exiting!");
+        e.printStackTrace();
+        return;
       }
-      logLine = webServerLogReader.readLine();
-    } catch (IOException e) {
-      System.out.println("FAILED\nCould not read file! Exiting!");
-      e.printStackTrace();
-      return;
     }
     if (logLine == null) {
       System.out.println("FAILED\nLog file is empty! Exiting!");
-- 
1.7.1