[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-web/master] Use metrics-lib for parsing bridge descriptors, too.
commit 333a9c497b8558d584af7e27d953f50ad800aa11
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed Mar 21 16:24:23 2012 +0100
Use metrics-lib for parsing bridge descriptors, too.
---
.../ernie/cron/BridgeStatsFileHandler.java | 27 ++-
.../ernie/cron/ConsensusStatsFileHandler.java | 8 +-
.../ernie/cron/SanitizedBridgesReader.java | 299 ++++++--------------
3 files changed, 111 insertions(+), 223 deletions(-)
diff --git a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
index 6e7f4eb..aba7804 100644
--- a/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
+++ b/src/org/torproject/ernie/cron/BridgeStatsFileHandler.java
@@ -14,12 +14,15 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
+import java.util.TimeZone;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.logging.Level;
@@ -95,6 +98,8 @@ public class BridgeStatsFileHandler {
/* Database connection string. */
private String connectionURL = null;
+ private SimpleDateFormat dateTimeFormat;
+
/**
* Initializes this class, including reading in intermediate results
* files <code>stats/bridge-stats-raw</code> and
@@ -122,6 +127,9 @@ public class BridgeStatsFileHandler {
/* Initialize database connection string. */
this.connectionURL = connectionURL;
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
/* Initialize logger. */
this.logger = Logger.getLogger(
BridgeStatsFileHandler.class.getName());
@@ -175,7 +183,9 @@ public class BridgeStatsFileHandler {
obs.put(headers[i], parts[i]);
}
}
- this.addObs(hashedBridgeIdentity, date, time, obs);
+ long dateTimeMillis = dateTimeFormat.parse(date + " "
+ + time).getTime();
+ this.addObs(hashedBridgeIdentity, dateTimeMillis, obs);
}
}
}
@@ -185,6 +195,9 @@ public class BridgeStatsFileHandler {
} catch (IOException e) {
this.logger.log(Level.WARNING, "Failed to read file "
+ this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
+ } catch (ParseException e) {
+ this.logger.log(Level.WARNING, "Failed to read file "
+ + this.bridgeStatsRawFile.getAbsolutePath() + "!", e);
}
}
@@ -253,9 +266,10 @@ public class BridgeStatsFileHandler {
* not included in the results, because stats are very likely broken.
*/
public void addZeroTwoTwoDescriptor(String hashedBridgeIdentity,
- String date, String time) {
- String value = hashedBridgeIdentity.toUpperCase() + "," + date + ","
- + time;
+ long publishedMillis) {
+ String value = hashedBridgeIdentity.toUpperCase() + ","
+ + this.dateTimeFormat.format(publishedMillis).
+ replaceAll(" ", ",");
if (!this.zeroTwoTwoDescriptors.contains(value)) {
this.logger.finer("Adding new bridge 0.2.2.x extra-info "
+ "descriptor: " + value);
@@ -281,11 +295,14 @@ public class BridgeStatsFileHandler {
* bridge and day, we keep the one with the later publication time and
* discard the other one.
*/
- public void addObs(String hashedIdentity, String date, String time,
+ public void addObs(String hashedIdentity, long publishedMillis,
Map<String, String> obs) {
for (String country : obs.keySet()) {
this.countries.add(country);
}
+ String dateTime = this.dateTimeFormat.format(publishedMillis);
+ String date = dateTime.split(" ")[0];
+ String time = dateTime.split(" ")[1];
String shortKey = hashedIdentity + "," + date;
String longKey = shortKey + "," + time;
SortedMap<String, Map<String, String>> tailMap =
diff --git a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
index 6c83b05..fccb29b 100644
--- a/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
+++ b/src/org/torproject/ernie/cron/ConsensusStatsFileHandler.java
@@ -66,6 +66,8 @@ public class ConsensusStatsFileHandler {
/* Database connection string. */
private String connectionURL = null;
+ private SimpleDateFormat dateTimeFormat;
+
/**
* Initializes this class, including reading in intermediate results
* files <code>stats/consensus-stats-raw</code> and
@@ -86,6 +88,9 @@ public class ConsensusStatsFileHandler {
/* Initialize database connection string. */
this.connectionURL = connectionURL;
+ this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
/* Initialize logger. */
this.logger = Logger.getLogger(
ConsensusStatsFileHandler.class.getName());
@@ -128,7 +133,8 @@ public class ConsensusStatsFileHandler {
* Adds the intermediate results of the number of running bridges in a
* given bridge status to the existing observations.
*/
- public void addBridgeConsensusResults(String published, int running) {
+ public void addBridgeConsensusResults(long publishedMillis, int running) {
+ String published = dateTimeFormat.format(publishedMillis);
String line = published + "," + running;
if (!this.bridgesRaw.containsKey(published)) {
this.logger.finer("Adding new bridge numbers: " + line);
diff --git a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
index 78bd7db..c7199cd 100644
--- a/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
+++ b/src/org/torproject/ernie/cron/SanitizedBridgesReader.java
@@ -2,30 +2,20 @@
* See LICENSE for licensing information */
package org.torproject.ernie.cron;
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.ByteArrayOutputStream;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.StringReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.List;
+import java.util.Iterator;
import java.util.Map;
-import java.util.SortedSet;
-import java.util.Stack;
-import java.util.TimeZone;
-import java.util.TreeSet;
-import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.commons.codec.digest.DigestUtils;
+import org.torproject.descriptor.BridgeNetworkStatus;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.ServerDescriptor;
public class SanitizedBridgesReader {
private ConsensusStatsFileHandler csfh;
@@ -45,217 +35,92 @@ public class SanitizedBridgesReader {
this.logger =
Logger.getLogger(SanitizedBridgesReader.class.getName());
- SortedSet<String> bridgesImportHistory = new TreeSet<String>();
- File bridgesImportHistoryFile =
- new File(statsDirectory, "bridges-import-history");
- if (keepImportHistory && bridgesImportHistoryFile.exists()) {
- try {
- BufferedReader br = new BufferedReader(new FileReader(
- bridgesImportHistoryFile));
- String line = null;
- while ((line = br.readLine()) != null) {
- bridgesImportHistory.add(line);
- }
- br.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not read in bridge descriptor "
- + "import history file. Skipping.");
- }
- }
if (bridgesDir.exists()) {
logger.fine("Importing files in directory " + bridgesDir + "/...");
- Stack<File> filesInInputDir = new Stack<File>();
- filesInInputDir.add(bridgesDir);
- List<File> problems = new ArrayList<File>();
- while (!filesInInputDir.isEmpty()) {
- File pop = filesInInputDir.pop();
- if (pop.isDirectory()) {
- for (File f : pop.listFiles()) {
- filesInInputDir.add(f);
- }
- continue;
- } else if (keepImportHistory && bridgesImportHistory.contains(
- pop.getName())) {
- continue;
- } else {
- try {
- BufferedInputStream bis = new BufferedInputStream(
- new FileInputStream(pop));
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- String fn = pop.getName();
- // TODO dateTime extraction doesn't work for sanitized network
- // statuses!
- String dateTime = fn.substring(0, 4) + "-" + fn.substring(4, 6)
- + "-" + fn.substring(6, 8) + " " + fn.substring(9, 11)
- + ":" + fn.substring(11, 13) + ":" + fn.substring(13, 15);
- this.parse(allData, dateTime, true);
- if (keepImportHistory) {
- bridgesImportHistory.add(pop.getName());
- }
- } catch (IOException e) {
- problems.add(pop);
- if (problems.size() > 3) {
- break;
- }
- }
- }
+ DescriptorReader reader =
+ DescriptorSourceFactory.createDescriptorReader();
+ reader.addDirectory(bridgesDir);
+ if (keepImportHistory) {
+ reader.setExcludeFiles(new File(statsDirectory,
+ "bridge-descriptor-history"));
}
- if (problems.isEmpty()) {
- logger.fine("Finished importing files in directory " + bridgesDir
- + "/.");
- } else {
- StringBuilder sb = new StringBuilder("Failed importing files in "
- + "directory " + bridgesDir + "/:");
- int printed = 0;
- for (File f : problems) {
- sb.append("\n " + f.getAbsolutePath());
- if (++printed >= 3) {
- sb.append("\n ... more");
- break;
+ Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ if (descriptorFile.getDescriptors() != null) {
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (descriptor instanceof BridgeNetworkStatus) {
+ this.addBridgeNetworkStatus(
+ (BridgeNetworkStatus) descriptor);
+ } else if (descriptor instanceof ServerDescriptor) {
+ this.addServerDescriptor((ServerDescriptor) descriptor);
+ } else if (descriptor instanceof ExtraInfoDescriptor) {
+ this.addExtraInfoDescriptor(
+ (ExtraInfoDescriptor) descriptor);
+ }
}
}
- logger.warning(sb.toString());
}
- if (keepImportHistory) {
- try {
- bridgesImportHistoryFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- bridgesImportHistoryFile));
- for (String line : bridgesImportHistory) {
- bw.write(line + "\n");
- }
- bw.close();
- } catch (IOException e) {
- logger.log(Level.WARNING, "Could not write bridge descriptor "
- + "import history file.");
- }
+ logger.info("Finished importing bridge descriptors.");
+ }
+ }
+
+ private void addBridgeNetworkStatus(BridgeNetworkStatus status) {
+ int runningBridges = 0;
+ for (NetworkStatusEntry statusEntry :
+ status.getStatusEntries().values()) {
+ if (statusEntry.getFlags().contains("Running")) {
+ runningBridges++;
}
}
+ this.csfh.addBridgeConsensusResults(status.getPublishedMillis(),
+ runningBridges);
}
- private void parse(byte[] allData, String dateTime, boolean sanitized) {
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(allData, "US-ASCII")));
- SimpleDateFormat timeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- String hashedIdentity = null, platformLine = null,
- publishedLine = null, geoipStartTimeLine = null,
- bridgeStatsEndLine = null;
- boolean skip = false;
- String line = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("r ")) {
- int runningBridges = 0;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("s ") && line.contains(" Running")) {
- runningBridges++;
- }
- }
- if (this.csfh != null) {
- this.csfh.addBridgeConsensusResults(dateTime, runningBridges);
- }
- } else if (line.startsWith("router ")) {
- } else if (line.startsWith("extra-info ")) {
- hashedIdentity = sanitized ? line.split(" ")[2]
- : DigestUtils.shaHex(line.split(" ")[2]).toUpperCase();
- if (this.bsfh != null) {
- skip = this.bsfh.isKnownRelay(hashedIdentity);
- }
- } else if (!skip && line.startsWith("platform ")) {
- platformLine = line;
- } else if (!skip && line.startsWith("published ")) {
- publishedLine = line;
- } else if (line.startsWith("opt fingerprint") ||
- line.startsWith("fingerprint")) {
- String identity = line.substring(line.startsWith("opt ") ?
- "opt fingerprint".length() : "fingerprint".length()).
- replaceAll(" ", "").toLowerCase();
- hashedIdentity = sanitized ? identity
- : DigestUtils.shaHex(identity).toUpperCase();
- } else if (!skip && line.startsWith("geoip-start-time ")) {
- geoipStartTimeLine = line;
- } else if (!skip && line.startsWith("geoip-client-origins")
- && line.split(" ").length > 1) {
- if (publishedLine == null ||
- geoipStartTimeLine == null) {
- this.logger.warning("Either published line or "
- + "geoip-start-time line is not present in "
- + (sanitized ? "sanitized" : "non-sanitized")
- + " bridge descriptors from " + dateTime + ".");
- break;
- }
- long published = timeFormat.parse(publishedLine.
- substring("published ".length())).getTime();
- long started = timeFormat.parse(geoipStartTimeLine.
- substring("geoip-start-time ".length())).getTime();
- long seconds = (published - started) / 1000L;
- double allUsers = 0.0D;
- Map<String, String> obs = new HashMap<String, String>();
- String[] parts = line.split(" ")[1].split(",");
- for (String p : parts) {
- String country = p.substring(0, 2);
- double users = ((double) Long.parseLong(p.substring(3)) - 4L)
- * 86400.0D / ((double) seconds);
- allUsers += users;
- obs.put(country, String.format("%.2f", users));
- }
- obs.put("zy", String.format("%.2f", allUsers));
- String date = publishedLine.split(" ")[1];
- String time = publishedLine.split(" ")[2];
- if (this.bsfh != null) {
- this.bsfh.addObs(hashedIdentity, date, time, obs);
- }
- } else if (!skip && line.startsWith("bridge-stats-end ")) {
- bridgeStatsEndLine = line;
- } else if (!skip && line.startsWith("bridge-ips")
- && line.split(" ").length > 1) {
- if (bridgeStatsEndLine == null) {
- this.logger.warning("bridge-ips line without preceding "
- + "bridge-stats-end line in "
- + (sanitized ? "sanitized" : "non-sanitized")
- + " bridge descriptor.");
- break;
- }
- double allUsers = 0.0D;
- Map<String, String> obs = new HashMap<String, String>();
- String[] parts = line.split(" ")[1].split(",");
- for (String p : parts) {
- String country = p.substring(0, 2);
- double users = (double) Long.parseLong(p.substring(3)) - 4L;
- allUsers += users;
- obs.put(country, String.format("%.2f", users));
- }
- obs.put("zy", String.format("%.2f", allUsers));
- String date = bridgeStatsEndLine.split(" ")[1];
- String time = bridgeStatsEndLine.split(" ")[2];
- if (this.bsfh != null) {
- this.bsfh.addObs(hashedIdentity, date, time, obs);
- }
+ private void addServerDescriptor(ServerDescriptor descriptor) {
+ if (descriptor.getPlatform() != null &&
+ descriptor.getPlatform().startsWith("Tor 0.2.2")) {
+ this.bsfh.addZeroTwoTwoDescriptor(descriptor.getFingerprint(),
+ descriptor.getPublishedMillis());
+ }
+ }
+
+ private void addExtraInfoDescriptor(ExtraInfoDescriptor descriptor) {
+ if (!this.bsfh.isKnownRelay(descriptor.getFingerprint())) {
+ if (descriptor.getGeoipStartTimeMillis() >= 0 &&
+ descriptor.getGeoipClientOrigins() != null) {
+ long seconds = (descriptor.getPublishedMillis()
+ - descriptor.getGeoipStartTimeMillis()) / 1000L;
+ double allUsers = 0.0D;
+ Map<String, String> obs = new HashMap<String, String>();
+ for (Map.Entry<String, Integer> e :
+ descriptor.getGeoipClientOrigins().entrySet()) {
+ String country = e.getKey();
+ double users = ((double) e.getValue() - 4) * 86400.0D
+ / ((double) seconds);
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
}
+ obs.put("zy", String.format("%.2f", allUsers));
+ this.bsfh.addObs(descriptor.getFingerprint(),
+ descriptor.getPublishedMillis(), obs);
}
- if (this.bsfh != null && platformLine != null &&
- platformLine.startsWith("platform Tor 0.2.2")) {
- String date = publishedLine.split(" ")[1];
- String time = publishedLine.split(" ")[2];
- this.bsfh.addZeroTwoTwoDescriptor(hashedIdentity, date, time);
+ if (descriptor.getBridgeStatsEndMillis() >= 0 &&
+ descriptor.getBridgeIps() != null) {
+ double allUsers = 0.0D;
+ Map<String, String> obs = new HashMap<String, String>();
+ for (Map.Entry<String, Integer> e :
+ descriptor.getBridgeIps().entrySet()) {
+ String country = e.getKey();
+ double users = (double) e.getValue() - 4;
+ allUsers += users;
+ obs.put(country, String.format("%.2f", users));
+ }
+ obs.put("zy", String.format("%.2f", allUsers));
+ this.bsfh.addObs(descriptor.getFingerprint(),
+ descriptor.getBridgeStatsEndMillis(), obs);
}
- } catch (IOException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
- } catch (ParseException e) {
- this.logger.log(Level.WARNING, "Could not parse bridge descriptor.",
- e);
- return;
+
}
}
}
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits