[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Add some code for #2394.
commit a3a8b86287695e74e76e7f3dfe2a6443613cdfcc
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Thu Mar 17 18:11:07 2011 +0100
Add some code for #2394.
---
task-2394/.gitignore | 6 +
task-2394/ParseDescriptors.java | 216 +++++++++++++++++++++++++++++++++++++++
task-2394/README | 27 +++++
3 files changed, 249 insertions(+), 0 deletions(-)
diff --git a/task-2394/.gitignore b/task-2394/.gitignore
new file mode 100644
index 0000000..bef5dec
--- /dev/null
+++ b/task-2394/.gitignore
@@ -0,0 +1,6 @@
+*.class
+descriptors/
+*.pdf
+*.csv
+*.jar
+
diff --git a/task-2394/ParseDescriptors.java b/task-2394/ParseDescriptors.java
new file mode 100644
index 0000000..b0ba916
--- /dev/null
+++ b/task-2394/ParseDescriptors.java
@@ -0,0 +1,216 @@
+import java.io.*;
+import java.util.*;
+import org.apache.commons.codec.binary.*;
+
+public class ParseDescriptors {
+ public static void main(String[] args) throws IOException {
+
+ /* Find all files that we should parse and distinguish between
+ * consensuses, votes, and server descriptors. */
+ SortedMap<String, File> consensuses = new TreeMap<String, File>();
+ SortedMap<String, File> descriptors = new TreeMap<String, File>();
+ SortedMap<String, File> votes = new TreeMap<String, File>();
+ Stack<File> files = new Stack<File>();
+ files.add(new File("descriptors"));
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ String filename = file.getName();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else if (filename.endsWith("-consensus")) {
+ consensuses.put(filename, file);
+ } else if (filename.contains("-vote-")) {
+ votes.put(filename, file);
+ } else if (filename.length() == 40) {
+ descriptors.put(filename, file);
+ }
+ }
+ System.out.println("We found " + consensuses.size()
+ + " consensuses, " + votes.size() + " votes, and "
+ + descriptors.size() + " server descriptors.");
+
+ /* Parse consensuses in an outer loop and the referenced votes and
+ * descriptors in inner loops. Write the results to disk as soon as
+ * we can to avoid keeping many things in memory. */
+ SortedMap<String, String> bandwidthAuthorities =
+ new TreeMap<String, String>();
+ bandwidthAuthorities.put("27B6B5996C426270A5C95488AA5BCEB6BCC86956",
+ "ides");
+ bandwidthAuthorities.put("80550987E1D626E3EBA5E5E75A458DE0626D088C",
+ "urras");
+ bandwidthAuthorities.put("D586D18309DED4CD6D57C18FDB97EFA96D330566",
+ "moria1");
+ bandwidthAuthorities.put("ED03BB616EB2F60BEC80151114BB25CEF515B226",
+ "gabelmoo");
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "bandwidth-comparison.csv"));
+ bw.write("validafter,fingerprint,nickname,category,"
+ + "descriptorbandwidth,consensusbandwidth");
+ for (String bandwidthAuthority : bandwidthAuthorities.values()) {
+ bw.write("," + bandwidthAuthority + "bandwidth");
+ }
+ bw.write("\n");
+ Map<String, String> parsedDescriptors = new HashMap<String, String>();
+ for (File consensusFile : consensuses.values()) {
+ System.out.println("Parsing consensus " + consensusFile.getName());
+ BufferedReader brC = new BufferedReader(new FileReader(
+ consensusFile));
+ String lineC, validAfter = null, lastDirSource = null,
+ lastRLine = null, lastSLine = null;
+ String consensusTimestamp = consensusFile.getName().substring(0,
+ "YYYY-MM-DD-hh-mm-ss".length());
+ Map<String, Map<String, String>> measuredBandwidthsByDirSource =
+ new HashMap<String, Map<String, String>>();
+ while ((lineC = brC.readLine()) != null) {
+
+ /* Start with parsing a consensus to find out which votes it
+ * contains. */
+ if (lineC.startsWith("valid-after ")) {
+ validAfter = lineC.substring("valid-after ".length());
+ } else if (lineC.startsWith("dir-source ")) {
+ lastDirSource = lineC.split(" ")[2];
+ } else if (lineC.startsWith("vote-digest ") &&
+ bandwidthAuthorities.containsKey(lastDirSource)) {
+ String voteDigest = lineC.substring("vote-digest ".length());
+ String voteFilename = consensusTimestamp + "-vote-"
+ + lastDirSource + "-" + voteDigest;
+ if (votes.containsKey(voteFilename)) {
+
+ /* Parse votes first and extract measured bandwidths. */
+ Map<String, String> measuredBandwidths =
+ new HashMap<String, String>();
+ measuredBandwidthsByDirSource.put(lastDirSource,
+ measuredBandwidths);
+ BufferedReader brV = new BufferedReader(new FileReader(
+ votes.get(voteFilename)));
+ String lineV;
+ while ((lineV = brV.readLine()) != null) {
+ if (lineV.startsWith("r ")) {
+ lastRLine = lineV;
+ } else if (lineV.startsWith("w ") &&
+ lineV.contains(" Measured=")) {
+ String fingerprint = Hex.encodeHexString(Base64.
+ decodeBase64(lastRLine.split(" ")[2] + "="));
+ String measuredBandwidth = lineV.substring(lineV.indexOf(
+ " Measured=") + " Measured=".length()).split(" ")[0];
+ measuredBandwidths.put(fingerprint, measuredBandwidth);
+ }
+ }
+ brV.close();
+ }
+
+ /* Parse r, s, and w lines from the consensus. */
+ } else if (lineC.startsWith("r ")) {
+ lastRLine = lineC;
+ } else if (lineC.startsWith("s ")) {
+ lastSLine = lineC;
+ } else if (lineC.startsWith("w ")) {
+ String[] parts = lastRLine.split(" ");
+ String nickname = parts[1];
+ String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
+ parts[2] + "="));
+ String descriptor = Hex.encodeHexString(Base64.decodeBase64(
+ parts[3] + "="));
+ boolean exitFlag = lastSLine.contains(" Exit");
+ boolean guardFlag = lastSLine.contains(" Guard");
+ String consensusBandwidth = lineC.substring(lineC.indexOf(
+ " Bandwidth=") + " Bandwidth=".length()).split(" ")[0];
+
+ /* Parse the referenced server descriptor (if we haven't done so
+ * before) to learn about the relay's exit policy and reported
+ * bandwidth. */
+ boolean parsedDescriptor = false, defaultPolicy = false;
+ String descriptorBandwidth = null;
+ if (parsedDescriptors.containsKey(descriptor)) {
+ String parseResults = parsedDescriptors.get(descriptor);
+ parsedDescriptor = true;
+ defaultPolicy = parseResults.endsWith("1");
+ descriptorBandwidth = parseResults.split(",")[0];
+ } else if (descriptors.containsKey(descriptor)) {
+ parsedDescriptor = true;
+ BufferedReader brD = new BufferedReader(new FileReader(
+ descriptors.get(descriptor)));
+ Set<String> defaultRejects = new HashSet<String>(
+ Arrays.asList(("0.0.0.0/8:*,169.254.0.0/16:*,"
+ + "127.0.0.0/8:*,192.168.0.0/16:*,10.0.0.0/8:*,"
+ + "172.16.0.0/12:*,$IP:*,*:25,*:119,*:135-139,*:445,"
+ + "*:563,*:1214,*:4661-4666,*:6346-6429,*:6699,"
+ + "*:6881-6999").split(",")));
+ /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed
+ * in the default exit policy again (and therefore removed
+ * from the default reject lines). */
+ Set<String> optionalRejects = new HashSet<String>(
+ Arrays.asList("*:465,*:587".split(",")));
+ String lineD, address = null;
+ while ((lineD = brD.readLine()) != null) {
+ if (lineD.startsWith("router ")) {
+ address = lineD.split(" ")[2];
+ } else if (lineD.startsWith("bandwidth ")) {
+ descriptorBandwidth = lineD.split(" ")[3];
+ } else if (lineD.startsWith("reject ")) {
+ String rejectPattern = lineD.substring("reject ".
+ length());
+ if (defaultRejects.contains(rejectPattern)) {
+ defaultRejects.remove(rejectPattern);
+ } else if (optionalRejects.contains(rejectPattern)) {
+ optionalRejects.remove(rejectPattern);
+ } else if (rejectPattern.equals(address + ":*")) {
+ defaultRejects.remove("$IP:*");
+ } else {
+ break;
+ }
+ } else if (lineD.startsWith("accept ")) {
+ if (defaultRejects.isEmpty() &&
+ lineD.equals("accept *:*")) {
+ defaultPolicy = true;
+ }
+ break;
+ }
+ }
+ brD.close();
+ parsedDescriptors.put(descriptor, descriptorBandwidth + ","
+ + (defaultPolicy ? "1" : "0"));
+ } else {
+ System.out.println("We're missing descriptor " + descriptor
+ + ". Please make sure that all referenced server "
+ + "descriptors are available. Continuing anyway.");
+ }
+
+ /* Write everything we know about this relay to disk. */
+ String category = null;
+ if (guardFlag && exitFlag && defaultPolicy) {
+ category = "Guard & Exit (default policy)";
+ } else if (!guardFlag && exitFlag && defaultPolicy) {
+ category = "Exit (default policy)";
+ } else if (guardFlag && exitFlag && !defaultPolicy) {
+ category = "Guard & Exit (non-default policy)";
+ } else if (!guardFlag && exitFlag && !defaultPolicy) {
+ category = "Exit (non-default policy)";
+ } else if (guardFlag && !exitFlag) {
+ category = "Guard";
+ } else if (!guardFlag && !exitFlag) {
+ category = "Middle";
+ }
+ bw.write(validAfter + "," + fingerprint + "," + nickname + ","
+ + category + "," + (parsedDescriptor ? descriptorBandwidth
+ : "NA") + "," + consensusBandwidth);
+ for (String bandwidthAuthority :
+ bandwidthAuthorities.keySet()) {
+ if (measuredBandwidthsByDirSource.containsKey(
+ bandwidthAuthority) && measuredBandwidthsByDirSource.get(
+ bandwidthAuthority).containsKey(fingerprint)) {
+ bw.write("," + measuredBandwidthsByDirSource.get(
+ bandwidthAuthority).get(fingerprint));
+ } else {
+ bw.write(",NA");
+ }
+ }
+ bw.write("\n");
+ }
+ }
+ brC.close();
+ }
+ bw.close();
+ }
+}
+
diff --git a/task-2394/README b/task-2394/README
new file mode 100644
index 0000000..d74d962
--- /dev/null
+++ b/task-2394/README
@@ -0,0 +1,27 @@
+Visualize self-reported vs. measured bandwidth of relays
+========================================================
+
+ - Download consensuses, votes, and server descriptors from the metrics
+ website. Be sure to download the server descriptors of the month
+ preceding the consensuses and votes, too. For example, you could
+ download these files:
+
+ https://metrics.torproject.org/data/votes-2011-01.tar.bz2
+ https://metrics.torproject.org/data/consensuses-2011-01.tar.bz2
+ https://metrics.torproject.org/data/server-descriptors-2010-12.tar.bz2
+ https://metrics.torproject.org/data/server-descriptors-2011-01.tar.bz2
+
+ - Extract the tarballs into subdirectory descriptors/.
+
+ - Download Apache Commons Codec 1.4 or higher and put in in this
+ directory.
+
+ - Compile the Java class, e.g.,
+ $ javac -cp commons-codec-1.4.jar ParseDescriptors.java
+
+ - Run the Java class, e.g.,
+ $ java -cp .:commons-codec-1.4.jar ParseDescriptors
+
+ - Once the Java application is done, you'll find a file
+ bandwidth-comparison.csv in this directory.
+
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits