[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Avoid downloading server descriptor tarballs in #2394.
commit 7b75c704f5dad573ad6ba4f0a04cb8599b4cdb7a
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Thu Jul 21 16:01:14 2011 +0200
Avoid downloading server descriptor tarballs in #2394.
---
task-2394/ParseDescriptors.java | 133 ++++++++++++++++++++++-----------------
task-2394/README | 17 ++----
2 files changed, 80 insertions(+), 70 deletions(-)
diff --git a/task-2394/ParseDescriptors.java b/task-2394/ParseDescriptors.java
index 2197670..641518d 100644
--- a/task-2394/ParseDescriptors.java
+++ b/task-2394/ParseDescriptors.java
@@ -21,13 +21,13 @@ public class ParseDescriptors {
consensuses.put(filename, file);
} else if (filename.endsWith("-votes")) {
votes.put(filename, file);
- } else if (filename.length() == 40) {
+ } else if (filename.endsWith("-descriptors")) {
descriptors.put(filename, file);
}
}
System.out.println("We found " + consensuses.size()
- + " consensuses, " + votes.size() + " votes, and "
- + descriptors.size() + " server descriptors.");
+ + " consensus files, " + votes.size() + " vote files, and "
+ + descriptors.size() + " server descriptor files.");
/* Parse consensuses in an outer loop and the referenced votes and
* descriptors in inner loops. Write the results to disk as soon as
@@ -50,7 +50,6 @@ public class ParseDescriptors {
bw.write("," + bandwidthAuthority + "bandwidth");
}
bw.write("\n");
- Map<String, String> parsedDescriptors = new HashMap<String, String>();
for (File consensusFile : consensuses.values()) {
System.out.println("Parsing consensus " + consensusFile.getName());
BufferedReader brC = new BufferedReader(new FileReader(
@@ -58,12 +57,12 @@ public class ParseDescriptors {
String lastRLine = null, lastSLine = null;
String consensusTimestamp = consensusFile.getName().substring(0,
"YYYY-MM-DD-hh-mm-ss".length());
- String votesFilename = consensusTimestamp + "-votes";
Map<String, Map<String, String>> measuredBandwidthsByDirSource =
new HashMap<String, Map<String, String>>();
/* Parse votes first, if we have them, and extract measured
* bandwidths. */
+ String votesFilename = consensusTimestamp + "-votes";
if (votes.containsKey(votesFilename)) {
BufferedReader brV = new BufferedReader(new FileReader(
votes.get(votesFilename)));
@@ -89,6 +88,74 @@ public class ParseDescriptors {
brV.close();
}
+ /* Parse referenced server descriptors to learn about exit policies
+ * and reported bandwidths. */
+ String descriptorsFilename = consensusTimestamp + "-descriptors";
+ Map<String, String> parsedDescriptors =
+ new HashMap<String, String>();
+ if (descriptors.containsKey(descriptorsFilename)) {
+ BufferedReader brD = new BufferedReader(new FileReader(
+ descriptors.get(descriptorsFilename)));
+ Set<String> defaultRejects = new HashSet<String>();
+ /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed
+ * in the default exit policy again (and therefore removed
+ * from the default reject lines). */
+ Set<String> optionalRejects = new HashSet<String>();
+ String lineD, address = null, fingerprint = null,
+ descriptorBandwidth = null;
+ boolean defaultPolicy = false, comparePolicies = true;
+ while ((lineD = brD.readLine()) != null) {
+ if (lineD.startsWith("router ")) {
+ address = lineD.split(" ")[2];
+ defaultRejects.clear();
+ defaultRejects.addAll(Arrays.asList(("0.0.0.0/8:*,"
+ + "169.254.0.0/16:*,127.0.0.0/8:*,192.168.0.0/16:*,"
+ + "10.0.0.0/8:*,172.16.0.0/12:*,$IP:*,*:25,*:119,"
+ + "*:135-139,*:445,*:563,*:1214,*:4661-4666,*:6346-6429,"
+ + "*:6699,*:6881-6999").split(",")));
+ optionalRejects.clear();
+ optionalRejects.addAll(Arrays.asList(
+ "*:465,*:587".split(",")));
+ fingerprint = null;
+ descriptorBandwidth = null;
+ defaultPolicy = false;
+ comparePolicies = true;
+ } else if (lineD.startsWith("opt fingerprint ") ||
+ lineD.startsWith("fingerprint ")) {
+ fingerprint = lineD.substring(lineD.startsWith("opt ") ?
+ "opt fingerprint".length() : "fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ } else if (lineD.startsWith("bandwidth ")) {
+ descriptorBandwidth = lineD.split(" ")[3];
+ } else if (lineD.startsWith("reject ") && comparePolicies) {
+ String rejectPattern = lineD.substring("reject ".
+ length());
+ if (defaultRejects.contains(rejectPattern)) {
+ defaultRejects.remove(rejectPattern);
+ } else if (optionalRejects.contains(rejectPattern)) {
+ optionalRejects.remove(rejectPattern);
+ } else if (rejectPattern.equals(address + ":*")) {
+ defaultRejects.remove("$IP:*");
+ } else {
+ comparePolicies = false;
+ }
+ } else if (lineD.startsWith("accept ") && comparePolicies) {
+ if (defaultRejects.isEmpty() &&
+ lineD.equals("accept *:*")) {
+ defaultPolicy = true;
+ }
+ comparePolicies = false;
+ } else if (lineD.equals("router-signature")) {
+ if (address != null && fingerprint != null &&
+ descriptorBandwidth != null) {
+ parsedDescriptors.put(fingerprint, descriptorBandwidth + ","
+ + (defaultPolicy ? "1" : "0"));
+ }
+ }
+ }
+ brD.close();
+ }
+
/* Parse r, s, and w lines from the consensus. */
String lineC, validAfter = null;
while ((lineC = brC.readLine()) != null) {
@@ -110,64 +177,14 @@ public class ParseDescriptors {
String consensusBandwidth = lineC.substring(lineC.indexOf(
" Bandwidth=") + " Bandwidth=".length()).split(" ")[0];
- /* Parse the referenced server descriptor (if we haven't done so
- * before) to learn about the relay's exit policy and reported
- * bandwidth. */
+ /* Look up whether we parsed this descriptor before. */
boolean parsedDescriptor = false, defaultPolicy = false;
String descriptorBandwidth = null;
- if (parsedDescriptors.containsKey(descriptor)) {
- String parseResults = parsedDescriptors.get(descriptor);
+ if (parsedDescriptors.containsKey(fingerprint)) {
+ String parseResults = parsedDescriptors.get(fingerprint);
parsedDescriptor = true;
defaultPolicy = parseResults.endsWith("1");
descriptorBandwidth = parseResults.split(",")[0];
- } else if (descriptors.containsKey(descriptor)) {
- parsedDescriptor = true;
- BufferedReader brD = new BufferedReader(new FileReader(
- descriptors.get(descriptor)));
- Set<String> defaultRejects = new HashSet<String>(
- Arrays.asList(("0.0.0.0/8:*,169.254.0.0/16:*,"
- + "127.0.0.0/8:*,192.168.0.0/16:*,10.0.0.0/8:*,"
- + "172.16.0.0/12:*,$IP:*,*:25,*:119,*:135-139,*:445,"
- + "*:563,*:1214,*:4661-4666,*:6346-6429,*:6699,"
- + "*:6881-6999").split(",")));
- /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed
- * in the default exit policy again (and therefore removed
- * from the default reject lines). */
- Set<String> optionalRejects = new HashSet<String>(
- Arrays.asList("*:465,*:587".split(",")));
- String lineD, address = null;
- while ((lineD = brD.readLine()) != null) {
- if (lineD.startsWith("router ")) {
- address = lineD.split(" ")[2];
- } else if (lineD.startsWith("bandwidth ")) {
- descriptorBandwidth = lineD.split(" ")[3];
- } else if (lineD.startsWith("reject ")) {
- String rejectPattern = lineD.substring("reject ".
- length());
- if (defaultRejects.contains(rejectPattern)) {
- defaultRejects.remove(rejectPattern);
- } else if (optionalRejects.contains(rejectPattern)) {
- optionalRejects.remove(rejectPattern);
- } else if (rejectPattern.equals(address + ":*")) {
- defaultRejects.remove("$IP:*");
- } else {
- break;
- }
- } else if (lineD.startsWith("accept ")) {
- if (defaultRejects.isEmpty() &&
- lineD.equals("accept *:*")) {
- defaultPolicy = true;
- }
- break;
- }
- }
- brD.close();
- parsedDescriptors.put(descriptor, descriptorBandwidth + ","
- + (defaultPolicy ? "1" : "0"));
- } else {
- System.out.println("We're missing descriptor " + descriptor
- + ". Please make sure that all referenced server "
- + "descriptors are available. Continuing anyway.");
}
/* Write everything we know about this relay to disk. */
diff --git a/task-2394/README b/task-2394/README
index 5cb47f6..714853e 100644
--- a/task-2394/README
+++ b/task-2394/README
@@ -1,26 +1,19 @@
Visualize self-reported vs. measured bandwidth of relays
========================================================
- - Download consensus and votes from the metrics website, e.g.,
+ - Download a single consensus and all referenced votes and server
+ descriptors from the metrics website, e.g.,
https://metrics.torproject.org/votes?valid-after=2011-07-13-05-00-00
https://metrics.torproject.org/consensus?valid-after=2011-07-13-05-00-00
+ https://metrics.torproject.org/serverdesc?valid-after=2011-07-13-05-00-00
- - Also download the server descriptor archives containing the referenced
- server descriptors from the metrics website. If the consensus was
- published on the 3rd of a month or later, it's sufficient to download a
- single tarball. Otherwise, download both tarballs for the current and
- previous month:
-
- https://metrics.torproject.org/data/server-descriptors-2011-07.tar.bz2
-
- - Extract the server descriptor tarball(s) into the subdirectory
- descriptors/. Also copy the consensus and votes there. With the URLs
+ - Put all files into a new subdirectory descriptors/. With the URLs
stated above, the directory would contain these files:
2011-07-13-05-00-00-consensus
2011-07-13-05-00-00-votes
- server-descriptors-2011-07
+ 2011-07-13-05-00-00-descriptors
- Download Apache Commons Codec 1.4 or higher and put in in this
directory.
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits