[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Avoid downloading vote tarballs in #2394.
commit dc4ab95ec8ec60e285fb715b7e94233ab5ac8e59
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed Jul 13 21:28:32 2011 +0200
Avoid downloading vote tarballs in #2394.
---
task-2394/ParseDescriptors.java | 70 +++++++++++++++++--------------------
task-2394/README | 37 ++++++++++++++-----
task-2394/bandwidth-comparison.R | 8 ++--
3 files changed, 63 insertions(+), 52 deletions(-)
diff --git a/task-2394/ParseDescriptors.java b/task-2394/ParseDescriptors.java
index b0ba916..2197670 100644
--- a/task-2394/ParseDescriptors.java
+++ b/task-2394/ParseDescriptors.java
@@ -19,7 +19,7 @@ public class ParseDescriptors {
files.addAll(Arrays.asList(file.listFiles()));
} else if (filename.endsWith("-consensus")) {
consensuses.put(filename, file);
- } else if (filename.contains("-vote-")) {
+ } else if (filename.endsWith("-votes")) {
votes.put(filename, file);
} else if (filename.length() == 40) {
descriptors.put(filename, file);
@@ -55,51 +55,45 @@ public class ParseDescriptors {
System.out.println("Parsing consensus " + consensusFile.getName());
BufferedReader brC = new BufferedReader(new FileReader(
consensusFile));
- String lineC, validAfter = null, lastDirSource = null,
- lastRLine = null, lastSLine = null;
+ String lastRLine = null, lastSLine = null;
String consensusTimestamp = consensusFile.getName().substring(0,
"YYYY-MM-DD-hh-mm-ss".length());
+ String votesFilename = consensusTimestamp + "-votes";
Map<String, Map<String, String>> measuredBandwidthsByDirSource =
new HashMap<String, Map<String, String>>();
- while ((lineC = brC.readLine()) != null) {
-
- /* Start with parsing a consensus to find out which votes it
- * contains. */
- if (lineC.startsWith("valid-after ")) {
- validAfter = lineC.substring("valid-after ".length());
- } else if (lineC.startsWith("dir-source ")) {
- lastDirSource = lineC.split(" ")[2];
- } else if (lineC.startsWith("vote-digest ") &&
- bandwidthAuthorities.containsKey(lastDirSource)) {
- String voteDigest = lineC.substring("vote-digest ".length());
- String voteFilename = consensusTimestamp + "-vote-"
- + lastDirSource + "-" + voteDigest;
- if (votes.containsKey(voteFilename)) {
- /* Parse votes first and extract measured bandwidths. */
- Map<String, String> measuredBandwidths =
- new HashMap<String, String>();
- measuredBandwidthsByDirSource.put(lastDirSource,
+ /* Parse votes first, if we have them, and extract measured
+ * bandwidths. */
+ if (votes.containsKey(votesFilename)) {
+ BufferedReader brV = new BufferedReader(new FileReader(
+ votes.get(votesFilename)));
+ String lineV;
+ Map<String, String> measuredBandwidths = null;
+ while ((lineV = brV.readLine()) != null) {
+ if (lineV.startsWith("dir-source ")) {
+ String dirSource = lineV.split(" ")[2];
+ measuredBandwidths = new HashMap<String, String>();
+ measuredBandwidthsByDirSource.put(dirSource,
measuredBandwidths);
- BufferedReader brV = new BufferedReader(new FileReader(
- votes.get(voteFilename)));
- String lineV;
- while ((lineV = brV.readLine()) != null) {
- if (lineV.startsWith("r ")) {
- lastRLine = lineV;
- } else if (lineV.startsWith("w ") &&
- lineV.contains(" Measured=")) {
- String fingerprint = Hex.encodeHexString(Base64.
- decodeBase64(lastRLine.split(" ")[2] + "="));
- String measuredBandwidth = lineV.substring(lineV.indexOf(
- " Measured=") + " Measured=".length()).split(" ")[0];
- measuredBandwidths.put(fingerprint, measuredBandwidth);
- }
- }
- brV.close();
+ } else if (lineV.startsWith("r ")) {
+ lastRLine = lineV;
+ } else if (lineV.startsWith("w ") &&
+ lineV.contains(" Measured=")) {
+ String fingerprint = Hex.encodeHexString(Base64.
+ decodeBase64(lastRLine.split(" ")[2] + "="));
+ String measuredBandwidth = lineV.substring(lineV.indexOf(
+ " Measured=") + " Measured=".length()).split(" ")[0];
+ measuredBandwidths.put(fingerprint, measuredBandwidth);
}
+ }
+ brV.close();
+ }
- /* Parse r, s, and w lines from the consensus. */
+ /* Parse r, s, and w lines from the consensus. */
+ String lineC, validAfter = null;
+ while ((lineC = brC.readLine()) != null) {
+ if (lineC.startsWith("valid-after ")) {
+ validAfter = lineC.substring("valid-after ".length());
} else if (lineC.startsWith("r ")) {
lastRLine = lineC;
} else if (lineC.startsWith("s ")) {
diff --git a/task-2394/README b/task-2394/README
index d74d962..5cb47f6 100644
--- a/task-2394/README
+++ b/task-2394/README
@@ -1,17 +1,26 @@
Visualize self-reported vs. measured bandwidth of relays
========================================================
- - Download consensuses, votes, and server descriptors from the metrics
- website. Be sure to download the server descriptors of the month
- preceding the consensuses and votes, too. For example, you could
- download these files:
+ - Download consensus and votes from the metrics website, e.g.,
- https://metrics.torproject.org/data/votes-2011-01.tar.bz2
- https://metrics.torproject.org/data/consensuses-2011-01.tar.bz2
- https://metrics.torproject.org/data/server-descriptors-2010-12.tar.bz2
- https://metrics.torproject.org/data/server-descriptors-2011-01.tar.bz2
+ https://metrics.torproject.org/votes?valid-after=2011-07-13-05-00-00
+ https://metrics.torproject.org/consensus?valid-after=2011-07-13-05-00-00
- - Extract the tarballs into subdirectory descriptors/.
+ - Also download the server descriptor archives containing the referenced
+ server descriptors from the metrics website. If the consensus was
+ published on the 3rd of a month or later, it's sufficient to download a
+ single tarball. Otherwise, download both tarballs for the current and
+ previous month:
+
+ https://metrics.torproject.org/data/server-descriptors-2011-07.tar.bz2
+
+ - Extract the server descriptor tarball(s) into the subdirectory
+ descriptors/. Also copy the consensus and votes there. With the URLs
+ stated above, the directory would contain these files:
+
+ 2011-07-13-05-00-00-consensus
+ 2011-07-13-05-00-00-votes
+ server-descriptors-2011-07
- Download Apache Commons Codec 1.4 or higher and put in in this
directory.
@@ -22,6 +31,14 @@ Visualize self-reported vs. measured bandwidth of relays
- Run the Java class, e.g.,
$ java -cp .:commons-codec-1.4.jar ParseDescriptors
- - Once the Java application is done, you'll find a file
+ Once the Java application is done, you'll find a file
bandwidth-comparison.csv in this directory.
+ - Plot the data:
+
+ $ R --slave -f bandwidth-comparison.R
+
+ (Edit bandwidth-comparison.R to color urras or one of the other
+ directories specially by putting in the lines containing "purple" and
+ re-run the last command above.)
+
diff --git a/task-2394/bandwidth-comparison.R b/task-2394/bandwidth-comparison.R
index 77a5842..356464b 100644
--- a/task-2394/bandwidth-comparison.R
+++ b/task-2394/bandwidth-comparison.R
@@ -32,8 +32,6 @@ opts(title = "Ratio between measured and self-reported relay bandwidth",
ggsave(filename = "bandwidth-comparison-relays.png",
width = 8, height = 5, dpi = 150)
-stopit
-
# Plot ECDFs to compare consensus to votes
cdf_relays_category_votes <- function(data, category) {
d <- data[data$category == category & data$descriptorbandwidth > 0, ]
@@ -81,8 +79,9 @@ scale_x_log10("\nRatio of measured by self-reported bandwidth",
scale_y_continuous("Fraction of relays\n", limits = c(0, 1),
formatter = "percent") +
scale_colour_manual("",
- breaks = c("consensus", "ides", "urras", "moria1", "gabelmoo"),
+ breaks = c("consensus", "urras", "ides", "moria1", "gabelmoo"),
values = c("black", rep(alpha("black", 0.25), 4))) +
+# values = c("black", alpha("purple", 0.5), rep(alpha("black", 0.25), 3))) +
geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
opts(title = paste("Measured vs. self-reported bandwidth ratios in",
"consensus and votes\n"), legend.position = "none")
@@ -141,8 +140,9 @@ scale_x_log10("\nRatio of measured by self-reported bandwidth",
scale_y_continuous("Fraction of measured bandwidth\n", limits = c(0, 1),
formatter = "percent") +
scale_colour_manual("",
- breaks = c("consensus", "ides", "urras", "moria1", "gabelmoo"),
+ breaks = c("consensus", "urras", "ides", "moria1", "gabelmoo"),
values = c("black", rep(alpha("black", 0.25), 4))) +
+# values = c("black", alpha("purple", 0.5), rep(alpha("black", 0.25), 3))) +
geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
opts(title = paste("Measured vs. self-reported bandwidth ratios in",
"consensus and votes\n"), legend.position = "none")
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits