[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [metrics-web/master] Use metrics-lib for parsing bridge descriptors, too.



commit 333a9c497b8558d584af7e27d953f50ad800aa11
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date:   Wed Mar 21 16:24:23 2012 +0100

    Use metrics-lib for parsing bridge descriptors, too.
---
 .../ernie/cron/BridgeStatsFileHandler.java         |   27 ++-
 .../ernie/cron/ConsensusStatsFileHandler.java      |    8 +-
 .../ernie/cron/SanitizedBridgesReader.java         |  299 ++++++--------------
 3 files changed, 111 insertions(+), 223 deletions(-)

diff --git a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
index 6e7f4eb..aba7804 100644
--- a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
+++ b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
@@ -14,12 +14,15 @@ import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
 import java.util.SortedSet;
+import java.util.TimeZone;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.logging.Level;
@@ -95,6 +98,8 @@ public class BridgeStatsFileHandler {
   /* Database connection string. */
   private String connectionURL = null;
 
+  private SimpleDateFormat dateTimeFormat;
+
   /**
    * Initializes this class, including reading in intermediate results
    * files <code>stats/bridge-stats-raw</code> and
@@ -122,6 +127,9 @@ public class BridgeStatsFileHandler {
     /* Initialize database connection string. */
     this.connectionURL = connectionURL;
 
+    this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
     /* Initialize logger. */
     this.logger = Logger.getLogger(
         BridgeStatsFileHandler.class.getName());
@@ -175,7 +183,9 @@ public class BridgeStatsFileHandler {
                   obs.put(headers[i], parts[i]);
                 }
               }
-              this.addObs(hashedBridgeIdentity, date, time, obs);
+              long dateTimeMillis = dateTimeFormat.parse(date + " "
+                  + time).getTime();
+              this.addObs(hashedBridgeIdentity, dateTimeMillis, obs);
             }
           }
         }
@@ -185,6 +195,9 @@ public class BridgeStatsFileHandler {
       } catch (IOException e) {
         this.logger.log(Level.WARNING, "Failed to read file "
             + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
+      } catch (ParseException e) {
+        this.logger.log(Level.WARNING, "Failed to read file "
+            + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
       }
     }
 
@@ -253,9 +266,10 @@ public class BridgeStatsFileHandler {
    * not included in the results, because stats are very likely broken.
    */
   public void addZeroTwoTwoDescriptor(String hashedBridgeIdentity,
-      String date, String time) {
-    String value = hashedBridgeIdentity.toUpperCase() + "," + date + ","
-        + time;
+      long publishedMillis) {
+    String value = hashedBridgeIdentity.toUpperCase() + ","
+        + this.dateTimeFormat.format(publishedMillis).
+        replaceAll(" ", ",");
     if (!this.zeroTwoTwoDescriptors.contains(value)) {
       this.logger.finer("Adding new bridge 0.2.2.x extra-info "
           + "descriptor: " + value);
@@ -281,11 +295,14 @@ public class BridgeStatsFileHandler {
    * bridge and day, we keep the one with the later publication time and
    * discard the other one.
    */
-  public void addObs(String hashedIdentity, String date, String time,
+  public void addObs(String hashedIdentity, long publishedMillis,
       Map<String, String> obs) {
     for (String country : obs.keySet()) {
       this.countries.add(country);
     }
+    String dateTime = this.dateTimeFormat.format(publishedMillis);
+    String date = dateTime.split(" ")[0];
+    String time = dateTime.split(" ")[1];
     String shortKey = hashedIdentity + "," + date;
     String longKey = shortKey + "," + time;
     SortedMap<String, Map<String, String>> tailMap =
diff --git a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
index 6c83b05..fccb29b 100644
--- a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
+++ b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
@@ -66,6 +66,8 @@ public class ConsensusStatsFileHandler {
   /* Database connection string. */
   private String connectionURL = null;
 
+  private SimpleDateFormat dateTimeFormat;
+
  /**
   * Initializes this class, including reading in intermediate results
   * files <code>stats/consensus-stats-raw</code> and
@@ -86,6 +88,9 @@ public class ConsensusStatsFileHandler {
     /* Initialize database connection string. */
     this.connectionURL = connectionURL;
 
+    this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
     /* Initialize logger. */
     this.logger = Logger.getLogger(
         ConsensusStatsFileHandler.class.getName());
@@ -128,7 +133,8 @@ public class ConsensusStatsFileHandler {
    * Adds the intermediate results of the number of running bridges in a
    * given bridge status to the existing observations.
    */
-  public void addBridgeConsensusResults(String published, int running) {
+  public void addBridgeConsensusResults(long publishedMillis, int running) {
+    String published = dateTimeFormat.format(publishedMillis);
     String line = published + "," + running;
     if (!this.bridgesRaw.containsKey(published)) {
       this.logger.finer("Adding new bridge numbers: " + line);
diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
index 78bd7db..c7199cd 100644
--- a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
+++ b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
@@ -2,30 +2,20 @@
  * See LICENSE for licensing information */
 package org.torproject.ernie.cron;
 
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.List;
+import java.util.Iterator;
 import java.util.Map;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.commons.codec.digest.DigestUtils;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.ServerDescriptor;
 
 public class SanitizedBridgesReader {
   private ConsensusStatsFileHandler csfh;
@@ -45,217 +35,92 @@ public class SanitizedBridgesReader {
     this.logger =
         Logger.getLogger(SanitizedBridgesReader.class.getName());
 
-    SortedSet<String> bridgesImportHistory = new TreeSet<String>();
-    File bridgesImportHistoryFile =
-        new File(statsDirectory, "bridges-import-history");
-    if (keepImportHistory && bridgesImportHistoryFile.exists()) {
-      try {
-        BufferedReader br = new BufferedReader(new FileReader(
-            bridgesImportHistoryFile));
-        String line = null;
-        while ((line = br.readLine()) != null) {
-          bridgesImportHistory.add(line);
-        }
-        br.close();
-      } catch (IOException e) {
-        logger.log(Level.WARNING, "Could not read in bridge descriptor "
-            + "import history file. Skipping.");
-      }
-    }
     if (bridgesDir.exists()) {
       logger.fine("Importing files in directory " + bridgesDir + "/...");
-      Stack<File> filesInInputDir = new Stack<File>();
-      filesInInputDir.add(bridgesDir);
-      List<File> problems = new ArrayList<File>();
-      while (!filesInInputDir.isEmpty()) {
-        File pop = filesInInputDir.pop();
-        if (pop.isDirectory()) {
-          for (File f : pop.listFiles()) {
-            filesInInputDir.add(f);
-          }
-          continue;
-        } else if (keepImportHistory && bridgesImportHistory.contains(
-            pop.getName())) {
-          continue;
-        } else {
-          try {
-            BufferedInputStream bis = new BufferedInputStream(
-                new FileInputStream(pop));
-            ByteArrayOutputStream baos = new ByteArrayOutputStream();
-            int len;
-            byte[] data = new byte[1024];
-            while ((len = bis.read(data, 0, 1024)) >= 0) {
-              baos.write(data, 0, len);
-            }
-            bis.close();
-            byte[] allData = baos.toByteArray();
-            String fn = pop.getName();
-            // TODO dateTime extraction doesn't work for sanitized network
-            // statuses!
-            String dateTime = fn.substring(0, 4) + "-" + fn.substring(4, 6)
-                + "-" + fn.substring(6, 8) + " " + fn.substring(9, 11)
-                + ":" + fn.substring(11, 13) + ":" + fn.substring(13, 15);
-            this.parse(allData, dateTime, true);
-            if (keepImportHistory) {
-              bridgesImportHistory.add(pop.getName());
-            }
-          } catch (IOException e) {
-            problems.add(pop);
-            if (problems.size() > 3) {
-              break;
-            }
-          }
-        }
+      DescriptorReader reader =
+          DescriptorSourceFactory.createDescriptorReader();
+      reader.addDirectory(bridgesDir);
+      if (keepImportHistory) {
+        reader.setExcludeFiles(new File(statsDirectory,
+            "bridge-descriptor-history"));
       }
-      if (problems.isEmpty()) {
-        logger.fine("Finished importing files in directory " + bridgesDir
-            + "/.");
-      } else {
-        StringBuilder sb = new StringBuilder("Failed importing files in "
-            + "directory " + bridgesDir + "/:");
-        int printed = 0;
-        for (File f : problems) {
-          sb.append("\n  " + f.getAbsolutePath());
-          if (++printed >= 3) {
-            sb.append("\n  ... more");
-            break;
+      Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
+      while (descriptorFiles.hasNext()) {
+        DescriptorFile descriptorFile = descriptorFiles.next();
+        if (descriptorFile.getDescriptors() != null) {
+          for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+            if (descriptor instanceof BridgeNetworkStatus) {
+              this.addBridgeNetworkStatus(
+                  (BridgeNetworkStatus) descriptor);
+            } else if (descriptor instanceof ServerDescriptor) {
+              this.addServerDescriptor((ServerDescriptor) descriptor);
+            } else if (descriptor instanceof ExtraInfoDescriptor) {
+              this.addExtraInfoDescriptor(
+                  (ExtraInfoDescriptor) descriptor);
+            }
           }
         }
-        logger.warning(sb.toString());
       }
-      if (keepImportHistory) {
-        try {
-          bridgesImportHistoryFile.getParentFile().mkdirs();
-          BufferedWriter bw = new BufferedWriter(new FileWriter(
-              bridgesImportHistoryFile));
-          for (String line : bridgesImportHistory) {
-            bw.write(line + "\n");
-          }
-          bw.close();
-        } catch (IOException e) {
-          logger.log(Level.WARNING, "Could not write bridge descriptor "
-              + "import history file.");
-        }
+      logger.info("Finished importing bridge descriptors.");
+    }
+  }
+
+  private void addBridgeNetworkStatus(BridgeNetworkStatus status) {
+    int runningBridges = 0;
+    for (NetworkStatusEntry statusEntry :
+        status.getStatusEntries().values()) {
+      if (statusEntry.getFlags().contains("Running")) {
+        runningBridges++;
       }
     }
+    this.csfh.addBridgeConsensusResults(status.getPublishedMillis(),
+        runningBridges);
   }
 
-  private void parse(byte[] allData, String dateTime, boolean sanitized) {
-    try {
-      BufferedReader br = new BufferedReader(new StringReader(
-          new String(allData, "US-ASCII")));
-      SimpleDateFormat timeFormat = new SimpleDateFormat(
-          "yyyy-MM-dd HH:mm:ss");
-      timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-      String hashedIdentity = null, platformLine = null,
-          publishedLine = null, geoipStartTimeLine = null,
-          bridgeStatsEndLine = null;
-      boolean skip = false;
-      String line = null;
-      while ((line = br.readLine()) != null) {
-        if (line.startsWith("r ")) {
-          int runningBridges = 0;
-          while ((line = br.readLine()) != null) {
-            if (line.startsWith("s ") && line.contains(" Running")) {
-              runningBridges++;
-            }
-          }
-          if (this.csfh != null) {
-            this.csfh.addBridgeConsensusResults(dateTime, runningBridges);
-          }
-        } else if (line.startsWith("router ")) {
-        } else if (line.startsWith("extra-info ")) {
-          hashedIdentity = sanitized ? line.split(" ")[2]
-              : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase();
-          if (this.bsfh != null) {
-            skip = this.bsfh.isKnownRelay(hashedIdentity);
-          }
-        } else if (!skip && line.startsWith("platform ")) {
-          platformLine = line;
-        } else if (!skip && line.startsWith("published ")) {
-          publishedLine = line;
-        } else if (line.startsWith("opt fingerprint") ||
-            line.startsWith("fingerprint")) {
-          String identity = line.substring(line.startsWith("opt ") ?
-              "opt fingerprint".length() : "fingerprint".length()).
-              replaceAll(" ", "").toLowerCase();
-          hashedIdentity = sanitized ? identity
-              : DigestUtils.shaHex(identity).toUpperCase();
-        } else if (!skip && line.startsWith("geoip-start-time ")) {
-          geoipStartTimeLine = line;
-        } else if (!skip && line.startsWith("geoip-client-origins")
-            && line.split(" ").length > 1) {
-          if (publishedLine == null ||
-              geoipStartTimeLine == null) {
-            this.logger.warning("Either published line or "
-                + "geoip-start-time line is not present in "
-                + (sanitized ? "sanitized" : "non-sanitized")
-                + " bridge descriptors from " + dateTime + ".");
-            break;
-          }
-          long published = timeFormat.parse(publishedLine.
-              substring("published ".length())).getTime();
-          long started = timeFormat.parse(geoipStartTimeLine.
-              substring("geoip-start-time ".length())).getTime();
-          long seconds = (published - started) / 1000L;
-          double allUsers = 0.0D;
-          Map<String, String> obs = new HashMap<String, String>();
-          String[] parts = line.split(" ")[1].split(",");
-          for (String p : parts) {
-            String country = p.substring(0, 2);
-            double users = ((double) Long.parseLong(p.substring(3)) - 4L)
-                    * 86400.0D / ((double) seconds);
-            allUsers += users;
-            obs.put(country, String.format("%.2f", users));
-          }
-          obs.put("zy", String.format("%.2f", allUsers));
-          String date = publishedLine.split(" ")[1];
-          String time = publishedLine.split(" ")[2];
-          if (this.bsfh != null) {
-            this.bsfh.addObs(hashedIdentity, date, time, obs);
-          }
-        } else if (!skip && line.startsWith("bridge-stats-end ")) {
-          bridgeStatsEndLine = line;
-        } else if (!skip && line.startsWith("bridge-ips")
-            && line.split(" ").length > 1) {
-          if (bridgeStatsEndLine == null) {
-            this.logger.warning("bridge-ips line without preceding "
-                + "bridge-stats-end line in "
-                + (sanitized ? "sanitized" : "non-sanitized")
-                + " bridge descriptor.");
-            break;
-          }
-          double allUsers = 0.0D;
-          Map<String, String> obs = new HashMap<String, String>();
-          String[] parts = line.split(" ")[1].split(",");
-          for (String p : parts) {
-            String country = p.substring(0, 2);
-            double users = (double) Long.parseLong(p.substring(3)) - 4L;
-            allUsers += users;
-            obs.put(country, String.format("%.2f", users));
-          }
-          obs.put("zy", String.format("%.2f", allUsers));
-          String date = bridgeStatsEndLine.split(" ")[1];
-          String time = bridgeStatsEndLine.split(" ")[2];
-          if (this.bsfh != null) {
-            this.bsfh.addObs(hashedIdentity, date, time, obs);
-          }
+  private void addServerDescriptor(ServerDescriptor descriptor) {
+    if (descriptor.getPlatform() != null &&
+        descriptor.getPlatform().startsWith("Tor 0.2.2")) {
+      this.bsfh.addZeroTwoTwoDescriptor(descriptor.getFingerprint(),
+          descriptor.getPublishedMillis());
+    }
+  }
+
+  private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) {
+    if (!this.bsfh.isKnownRelay(descriptor.getFingerprint())) {
+      if (descriptor.getGeoipStartTimeMillis() >= 0 &&
+          descriptor.getGeoipClientOrigins() != null) {
+        long seconds = (descriptor.getPublishedMillis()
+            - descriptor.getGeoipStartTimeMillis()) / 1000L;
+        double allUsers = 0.0D;
+        Map<String, String> obs = new HashMap<String, String>();
+        for (Map.Entry<String, Integer> e :
+            descriptor.getGeoipClientOrigins().entrySet()) {
+          String country = e.getKey();
+          double users = ((double) e.getValue() - 4) * 86400.0D
+              / ((double) seconds);
+          allUsers += users;
+          obs.put(country, String.format("%.2f", users));
         }
+        obs.put("zy", String.format("%.2f", allUsers));
+        this.bsfh.addObs(descriptor.getFingerprint(),
+            descriptor.getPublishedMillis(), obs);
       }
-      if (this.bsfh != null && platformLine != null &&
-          platformLine.startsWith("platform Tor 0.2.2")) {
-        String date = publishedLine.split(" ")[1];
-        String time = publishedLine.split(" ")[2];
-        this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
+      if (descriptor.getBridgeStatsEndMillis() >= 0 &&
+          descriptor.getBridgeIps() != null) {
+        double allUsers = 0.0D;
+        Map<String, String> obs = new HashMap<String, String>();
+        for (Map.Entry<String, Integer> e :
+            descriptor.getBridgeIps().entrySet()) {
+          String country = e.getKey();
+          double users = (double) e.getValue() - 4;
+          allUsers += users;
+          obs.put(country, String.format("%.2f", users));
+        }
+        obs.put("zy", String.format("%.2f", allUsers));
+        this.bsfh.addObs(descriptor.getFingerprint(),
+            descriptor.getBridgeStatsEndMillis(), obs);
       }
-    } catch (IOException e) {
-      this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
-          e);
-      return;
-    } catch (ParseException e) {
-      this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
-          e);
-      return;
+
     }
   }
 }



_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits