[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [metrics-tasks/master] Avoid downloading server descriptor tarballs in #2394.



commit 7b75c704f5dad573ad6ba4f0a04cb8599b4cdb7a
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date:   Thu Jul 21 16:01:14 2011 +0200

    Avoid downloading server descriptor tarballs in #2394.
---
 task-2394/ParseDescriptors.java |  133 ++++++++++++++++++++++-----------------
 task-2394/README                |   17 ++----
 2 files changed, 80 insertions(+), 70 deletions(-)

diff --git a/task-2394/ParseDescriptors.java b/task-2394/ParseDescriptors.java
index 2197670..641518d 100644
--- a/task-2394/ParseDescriptors.java
+++ b/task-2394/ParseDescriptors.java
@@ -21,13 +21,13 @@ public class ParseDescriptors {
         consensuses.put(filename, file);
       } else if (filename.endsWith("-votes")) {
         votes.put(filename, file);
-      } else if (filename.length() == 40) {
+      } else if (filename.endsWith("-descriptors")) {
         descriptors.put(filename, file);
       }
     }
     System.out.println("We found " + consensuses.size()
-        + " consensuses, " + votes.size() + " votes, and "
-        + descriptors.size() + " server descriptors.");
+        + " consensus files, " + votes.size() + " vote files, and "
+        + descriptors.size() + " server descriptor files.");
 
     /* Parse consensuses in an outer loop and the referenced votes and
      * descriptors in inner loops.  Write the results to disk as soon as
@@ -50,7 +50,6 @@ public class ParseDescriptors {
       bw.write("," + bandwidthAuthority + "bandwidth");
     }
     bw.write("\n");
-    Map<String, String> parsedDescriptors = new HashMap<String, String>();
     for (File consensusFile : consensuses.values()) {
       System.out.println("Parsing consensus " + consensusFile.getName());
       BufferedReader brC = new BufferedReader(new FileReader(
@@ -58,12 +57,12 @@ public class ParseDescriptors {
       String lastRLine = null, lastSLine = null;
       String consensusTimestamp = consensusFile.getName().substring(0,
               "YYYY-MM-DD-hh-mm-ss".length());
-      String votesFilename = consensusTimestamp + "-votes";
       Map<String, Map<String, String>> measuredBandwidthsByDirSource =
           new HashMap<String, Map<String, String>>();
 
       /* Parse votes first, if we have them, and extract measured
        * bandwidths. */
+      String votesFilename = consensusTimestamp + "-votes";
       if (votes.containsKey(votesFilename)) {
         BufferedReader brV = new BufferedReader(new FileReader(
             votes.get(votesFilename)));
@@ -89,6 +88,74 @@ public class ParseDescriptors {
         brV.close();
       }
 
+      /* Parse referenced server descriptors to learn about exit policies
+       * and reported bandwidths. */
+      String descriptorsFilename = consensusTimestamp + "-descriptors";
+      Map<String, String> parsedDescriptors =
+          new HashMap<String, String>();
+      if (descriptors.containsKey(descriptorsFilename)) {
+        BufferedReader brD = new BufferedReader(new FileReader(
+            descriptors.get(descriptorsFilename)));
+        Set<String> defaultRejects = new HashSet<String>();
+        /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed
+         * in the default exit policy again (and therefore removed
+         * from the default reject lines). */
+        Set<String> optionalRejects = new HashSet<String>();
+        String lineD, address = null, fingerprint = null,
+            descriptorBandwidth = null;
+        boolean defaultPolicy = false, comparePolicies = true;
+        while ((lineD = brD.readLine()) != null) {
+          if (lineD.startsWith("router ")) {
+            address = lineD.split(" ")[2];
+            defaultRejects.clear();
+            defaultRejects.addAll(Arrays.asList(("0.0.0.0/8:*,"
+                + "169.254.0.0/16:*,127.0.0.0/8:*,192.168.0.0/16:*,"
+                + "10.0.0.0/8:*,172.16.0.0/12:*,$IP:*,*:25,*:119,"
+                + "*:135-139,*:445,*:563,*:1214,*:4661-4666,*:6346-6429,"
+                + "*:6699,*:6881-6999").split(",")));
+            optionalRejects.clear();
+            optionalRejects.addAll(Arrays.asList(
+                "*:465,*:587".split(",")));
+            fingerprint = null;
+            descriptorBandwidth = null;
+            defaultPolicy = false;
+            comparePolicies = true;
+          } else if (lineD.startsWith("opt fingerprint ") ||
+              lineD.startsWith("fingerprint ")) {
+            fingerprint = lineD.substring(lineD.startsWith("opt ") ?
+                "opt fingerprint".length() : "fingerprint".length()).
+                replaceAll(" ", "").toLowerCase();
+          } else if (lineD.startsWith("bandwidth ")) {
+            descriptorBandwidth = lineD.split(" ")[3];
+          } else if (lineD.startsWith("reject ") && comparePolicies) {
+            String rejectPattern = lineD.substring("reject ".
+                length());
+            if (defaultRejects.contains(rejectPattern)) {
+              defaultRejects.remove(rejectPattern);
+            } else if (optionalRejects.contains(rejectPattern)) {
+              optionalRejects.remove(rejectPattern);
+            } else if (rejectPattern.equals(address + ":*")) {
+              defaultRejects.remove("$IP:*");
+            } else {
+              comparePolicies = false;
+            }
+          } else if (lineD.startsWith("accept ") && comparePolicies) {
+            if (defaultRejects.isEmpty() &&
+                lineD.equals("accept *:*")) {
+              defaultPolicy = true;
+            }
+            comparePolicies = false;
+          } else if (lineD.equals("router-signature")) {
+            if (address != null && fingerprint != null &&
+                descriptorBandwidth != null) {
+              parsedDescriptors.put(fingerprint, descriptorBandwidth + ","
+                  + (defaultPolicy ? "1" : "0"));
+            }
+          }
+        }
+        brD.close();
+      }
+
       /* Parse r, s, and w lines from the consensus. */
       String lineC, validAfter = null;
       while ((lineC = brC.readLine()) != null) {
@@ -110,64 +177,14 @@ public class ParseDescriptors {
           String consensusBandwidth = lineC.substring(lineC.indexOf(
               " Bandwidth=") + " Bandwidth=".length()).split(" ")[0];
 
-          /* Parse the referenced server descriptor (if we haven't done so
-           * before) to learn about the relay's exit policy and reported
-           * bandwidth. */
+          /* Look up whether we parsed this descriptor before. */
           boolean parsedDescriptor = false, defaultPolicy = false;
           String descriptorBandwidth = null;
-          if (parsedDescriptors.containsKey(descriptor)) {
-            String parseResults = parsedDescriptors.get(descriptor);
+          if (parsedDescriptors.containsKey(fingerprint)) {
+            String parseResults = parsedDescriptors.get(fingerprint);
             parsedDescriptor = true;
             defaultPolicy = parseResults.endsWith("1");
             descriptorBandwidth = parseResults.split(",")[0];
-          } else if (descriptors.containsKey(descriptor)) {
-            parsedDescriptor = true;
-            BufferedReader brD = new BufferedReader(new FileReader(
-                descriptors.get(descriptor)));
-            Set<String> defaultRejects = new HashSet<String>(
-                Arrays.asList(("0.0.0.0/8:*,169.254.0.0/16:*,"
-                + "127.0.0.0/8:*,192.168.0.0/16:*,10.0.0.0/8:*,"
-                + "172.16.0.0/12:*,$IP:*,*:25,*:119,*:135-139,*:445,"
-                + "*:563,*:1214,*:4661-4666,*:6346-6429,*:6699,"
-                + "*:6881-6999").split(",")));
-            /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed
-             * in the default exit policy again (and therefore removed
-             * from the default reject lines). */
-            Set<String> optionalRejects = new HashSet<String>(
-                Arrays.asList("*:465,*:587".split(",")));
-            String lineD, address = null;
-            while ((lineD = brD.readLine()) != null) {
-              if (lineD.startsWith("router ")) {
-                address = lineD.split(" ")[2];
-              } else if (lineD.startsWith("bandwidth ")) {
-                descriptorBandwidth = lineD.split(" ")[3];
-              } else if (lineD.startsWith("reject ")) {
-                String rejectPattern = lineD.substring("reject ".
-                    length());
-                if (defaultRejects.contains(rejectPattern)) {
-                  defaultRejects.remove(rejectPattern);
-                } else if (optionalRejects.contains(rejectPattern)) {
-                  optionalRejects.remove(rejectPattern);
-                } else if (rejectPattern.equals(address + ":*")) {
-                  defaultRejects.remove("$IP:*");
-                } else {
-                  break;
-                }
-              } else if (lineD.startsWith("accept ")) {
-                if (defaultRejects.isEmpty() &&
-                    lineD.equals("accept *:*")) {
-                  defaultPolicy = true;
-                }
-                break;
-              }
-            }
-            brD.close();
-            parsedDescriptors.put(descriptor, descriptorBandwidth + ","
-                + (defaultPolicy ? "1" : "0"));
-          } else {
-            System.out.println("We're missing descriptor " + descriptor
-                + ".  Please make sure that all referenced server "
-                + "descriptors are available.  Continuing anyway.");
           }
 
           /* Write everything we know about this relay to disk. */
diff --git a/task-2394/README b/task-2394/README
index 5cb47f6..714853e 100644
--- a/task-2394/README
+++ b/task-2394/README
@@ -1,26 +1,19 @@
 Visualize self-reported vs. measured bandwidth of relays
 ========================================================
 
- - Download consensus and votes from the metrics website, e.g.,
+ - Download a single consensus and all referenced votes and server
+   descriptors from the metrics website, e.g.,
 
    https://metrics.torproject.org/votes?valid-after=2011-07-13-05-00-00
    https://metrics.torproject.org/consensus?valid-after=2011-07-13-05-00-00
+   https://metrics.torproject.org/serverdesc?valid-after=2011-07-13-05-00-00
 
- - Also download the server descriptor archives containing the referenced
-   server descriptors from the metrics website.  If the consensus was
-   published on the 3rd of a month or later, it's sufficient to download a
-   single tarball.  Otherwise, download both tarballs for the current and
-   previous month:
-
-   https://metrics.torproject.org/data/server-descriptors-2011-07.tar.bz2
-
- - Extract the server descriptor tarball(s) into the subdirectory
-   descriptors/.  Also copy the consensus and votes there.  With the URLs
+ - Put all files into a new subdirectory descriptors/.  With the URLs
    stated above, the directory would contain these files:
 
      2011-07-13-05-00-00-consensus
      2011-07-13-05-00-00-votes
-     server-descriptors-2011-07
+     2011-07-13-05-00-00-descriptors
 
  - Download Apache Commons Codec 1.4 or higher and put in in this
    directory.

_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits