[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Add bridge pool assignment file parser for #2680.
commit 1e8dbd3857c58e26f57973da4d64170eae0e1be6
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Mon Mar 14 14:15:33 2011 +0100
Add bridge pool assignment file parser for #2680.
---
task-2680/.gitignore | 2 +
task-2680/ProcessSanitizedAssignments.java | 102 ++++++++++++++++++++++++++++
task-2680/README | 40 ++++++++++-
task-2680/verify.R | 6 ++
4 files changed, 147 insertions(+), 3 deletions(-)
diff --git a/task-2680/.gitignore b/task-2680/.gitignore
index 6378a79..b394fe6 100644
--- a/task-2680/.gitignore
+++ b/task-2680/.gitignore
@@ -4,4 +4,6 @@ bridge-descriptors/
commons-codec-1.4.jar
consensuses/
*.tar.bz2
+*.swp
+bridge-pool-assignments/
diff --git a/task-2680/ProcessSanitizedAssignments.java b/task-2680/ProcessSanitizedAssignments.java
new file mode 100644
index 0000000..9289aa1
--- /dev/null
+++ b/task-2680/ProcessSanitizedAssignments.java
@@ -0,0 +1,102 @@
+import java.io.*;
+import java.util.*;
+
+public class ProcessSanitizedAssignments {
+ public static void main(String[] args) throws IOException {
+
+ /* Validate command-line arguments. */
+ if (args.length != 1 || !new File(args[0]).exists()) {
+ System.out.println("Usage: java ProcessSanitizedAssignments <dir>");
+ System.exit(1);
+ }
+
+ /* Find all files that we should parse. Somewhat fragile, but should
+ * work. */
+ System.out.println("Creating list of files we should parse.");
+ SortedMap<String, File> assignments = new TreeMap<String, File>();
+ Stack<File> files = new Stack<File>();
+ files.add(new File(args[0]));
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else {
+ assignments.put(file.getName(), file);
+ }
+ }
+ System.out.println("We found " + assignments.size() + " bridge pool "
+ + "assignment files.");
+
+ /* Parse assignments. */
+ if (!assignments.isEmpty()) {
+ System.out.println("Parsing bridge pool assignment files.");
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "assignments.csv"));
+ bw.write("assignment,fingerprint,type,ring,port,flag,bucket\n");
+ int parsedAssignments = 0, totalAssignments = assignments.size(),
+ writtenOutputLines = 1;
+ long started = System.currentTimeMillis();
+ for (File file : assignments.values()) {
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line, assignmentTime = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("bridge-pool-assignment ")) {
+ assignmentTime = line.substring("bridge-pool-assignment ".
+ length());
+ } else {
+ String[] parts = line.split(" ");
+ String fingerprint = parts[0];
+ String type = parts[1];
+ String ring = null, port = null, flag = null, bucket = null;
+ for (int i = 2; i < parts.length; i++) {
+ String[] parts2 = parts[i].split("=");
+ String key = parts2[0];
+ String value = parts2[1];
+ if (key.equals("ring")) {
+ } else if (key.equals("ring")) {
+ ring = value;
+ } else if (key.equals("port")) {
+ port = value;
+ } else if (key.equals("flag")) {
+ flag = value;
+ } else if (key.equals("bucket")) {
+ bucket = value;
+ } else {
+ System.out.println("Unknown keyword in line '" + line
+ + "'. Please check. Exiting.");
+ System.exit(1);
+ }
+ }
+ bw.write(assignmentTime + "," + fingerprint + "," + type + ","
+ + (ring != null ? ring : "NA") + ","
+ + (port != null ? port : "NA") + ","
+ + (flag != null ? flag : "NA") + ","
+ + (bucket != null ? bucket : "NA") + "\n");
+ writtenOutputLines++;
+ }
+ }
+ br.close();
+ parsedAssignments++;
+ if (parsedAssignments % (totalAssignments / 10) == 0) {
+ double fractionDone = (double) (parsedAssignments) /
+ (double) totalAssignments;
+ double fractionLeft = 1.0D - fractionDone;
+ long now = System.currentTimeMillis();
+ double millisLeft = ((double) (now - started)) * fractionLeft /
+ fractionDone;
+ long secondsLeft = (long) millisLeft / 1000L;
+ System.out.println(" " + (parsedAssignments / (totalAssignments
+ / 10)) + "0% done, " + secondsLeft + " seconds left.");
+ }
+ }
+ bw.close();
+ System.out.println("Parsed " + parsedAssignments + " bridge pool "
+ + "assignment files and wrote " + writtenOutputLines + " lines "
+ + "to assignments.csv.");
+ }
+
+ /* This is it. */
+ System.out.println("Terminating.");
+ }
+}
+
diff --git a/task-2680/README b/task-2680/README
index a00856f..65d8b85 100644
--- a/task-2680/README
+++ b/task-2680/README
@@ -6,9 +6,9 @@ This ticket contains Java and R code to
This README has a separate section for each Java or R code snippet.
-The Java applications produce three output formats containing bridge
-descriptors, bridge status lines, and hashed relay identities. The data
-formats are described below.
+The Java applications produce four output formats containing bridge
+descriptors, bridge status lines, bridge pool assignments, and hashed
+relay identities. The data formats are described below.
--------------------------------------------------------------------------
@@ -33,6 +33,23 @@ ProcessSanitizedBridges.java
--------------------------------------------------------------------------
+ProcessSanitizedAssignments.java
+
+ - Download sanitized bridge pool assignments from the metrics website,
+ e.g., https://metrics.torproject.org/data/bridge-pool-assignments-2011-01.tar.bz2
+ and extract them in a local directory, e.g., bridge-pool-assignments/.
+
+ - Compile the Java class, e.g.,
+ $ javac ProcessSanitizedAssignments.java
+
+ - Run the Java class, e.g.,
+ $ java ProcessSanitizedAssignments bridge-pool-assignments/
+
+ - Once the Java application is done, you'll find a file assignments.csv
+ in this directory.
+
+--------------------------------------------------------------------------
+
ProcessRelayConsensuses.java
- Download v3 relay consensuses from the metrics website, e.g.,
@@ -130,6 +147,23 @@ The columns in statuses.csv are:
--------------------------------------------------------------------------
+assignments.csv
+
+The assignments.csv file contains one line for every running bridge and
+the rings, subrings, and buckets that BridgeDB assigned it to.
+
+The columns in assignments.csv are:
+
+ - assignment: ISO-formatted bridge pool assignment time
+ - fingerprint: Hex-formatted SHA-1 hash of identity fingerprint
+ - type: Name of the distributor: "https", "email", or "unallocated"
+ - ring: Ring number, only for distributor "https"
+ - port: Port subring
+ - flag: Flag subring
+ - bucket: File bucket, only for distributor "unallocated"
+
+--------------------------------------------------------------------------
+
relays.csv
The relays.csv file contains SHA-1 hashes of identity fingerprints of
diff --git a/task-2680/verify.R b/task-2680/verify.R
index 63ef233..241a196 100644
--- a/task-2680/verify.R
+++ b/task-2680/verify.R
@@ -25,3 +25,9 @@ if (file.exists("relays.csv")) {
summary(as.POSIXct(r$consensus))
}
+if (file.exists("assignments.csv")) {
+ cat("Verifying assignments.csv. This may take a while.\n")
+ r <- read.csv("assignments.csv", stringsAsFactors = FALSE)
+ summary(as.POSIXct(r$assignment))
+}
+
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits