[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Add code for 2680.
commit ee7df6a759f6788579f61403fb9771996ea5c988
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Fri Mar 11 14:51:12 2011 +0100
Add code for 2680.
---
task-2680/.gitignore | 6 +
task-2680/ProcessRelayConsensuses.java | 85 ++++++++
task-2680/ProcessSanitizedBridges.java | 327 ++++++++++++++++++++++++++++++++
task-2680/README | 145 ++++++++++++++
task-2680/verify.R | 27 +++
5 files changed, 590 insertions(+), 0 deletions(-)
diff --git a/task-2680/.gitignore b/task-2680/.gitignore
new file mode 100644
index 0000000..134e86d
--- /dev/null
+++ b/task-2680/.gitignore
@@ -0,0 +1,6 @@
+*.class
+*.csv
+bridge-descriptors/
+commons-codec-1.4.jar
+consensuses/
+
diff --git a/task-2680/ProcessRelayConsensuses.java b/task-2680/ProcessRelayConsensuses.java
new file mode 100644
index 0000000..44d9ce3
--- /dev/null
+++ b/task-2680/ProcessRelayConsensuses.java
@@ -0,0 +1,85 @@
+import java.io.*;
+import java.util.*;
+import org.apache.commons.codec.binary.*;
+import org.apache.commons.codec.digest.*;
+
+public class ProcessRelayConsensuses {
+ public static void main(String[] args) throws IOException {
+
+ /* Validate command-line arguments. */
+ if (args.length != 1 || !new File(args[0]).exists()) {
+ System.out.println("Usage: java ProcessRelayConsensuses <dir>");
+ System.exit(1);
+ }
+
+ /* Find all files that we should parse. Somewhat fragile, but should
+ * work. */
+ System.out.println("Creating list of files we should parse.");
+ SortedMap<String, File> consensuses = new TreeMap<String, File>();
+ Stack<File> files = new Stack<File>();
+ files.add(new File(args[0]));
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ String filename = file.getName();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else if (filename.endsWith("-consensus")) {
+ consensuses.put(filename, file);
+ }
+ }
+ System.out.println("We found " + consensuses.size()
+ + " consensuses.");
+
+ /* Parse consensuses. */
+ if (!consensuses.isEmpty()) {
+ System.out.println("Parsing consensuses.");
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "relays.csv"));
+ bw.write("consensus,fingerprint\n");
+ int parsedConsensuses = 0, totalConsensuses = consensuses.size(),
+ writtenOutputLines = 1;
+ long started = System.currentTimeMillis();
+ for (File file : consensuses.values()) {
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line, validAfter = null;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("valid-after ")) {
+ validAfter = line.substring("valid-after ".length());
+ } else if (line.startsWith("r ")) {
+ if (validAfter == null) {
+ System.out.println("Found an r line before the valid-after "
+ + "line in " + file.getName() + ". Please check. "
+ + "Exiting.");
+ System.exit(1);
+ }
+ String fingerprint = DigestUtils.shaHex(Base64.decodeBase64(
+ line.split(" ")[2] + "="));
+ bw.write(validAfter + "," + fingerprint + "\n");
+ writtenOutputLines++;
+ }
+ }
+ br.close();
+ parsedConsensuses++;
+ if (parsedConsensuses % (totalConsensuses / 10) == 0) {
+ double fractionDone = (double) (parsedConsensuses) /
+ (double) totalConsensuses;
+ double fractionLeft = 1.0D - fractionDone;
+ long now = System.currentTimeMillis();
+ double millisLeft = ((double) (now - started)) * fractionLeft /
+ fractionDone;
+ long secondsLeft = (long) millisLeft / 1000L;
+ System.out.println(" " + (parsedConsensuses / (totalConsensuses
+ / 10)) + "0% done, " + secondsLeft + " seconds left.");
+ }
+ }
+ bw.close();
+ System.out.println("Parsed " + parsedConsensuses + " consensuses "
+ + "and wrote " + writtenOutputLines + " lines to relays.csv.");
+ }
+
+ /* This is it. */
+ System.out.println("Terminating.");
+ }
+}
+
+
diff --git a/task-2680/ProcessSanitizedBridges.java b/task-2680/ProcessSanitizedBridges.java
new file mode 100644
index 0000000..1f0e00e
--- /dev/null
+++ b/task-2680/ProcessSanitizedBridges.java
@@ -0,0 +1,327 @@
+import java.io.*;
+import java.text.*;
+import java.util.*;
+import org.apache.commons.codec.binary.*;
+
+public class ProcessSanitizedBridges {
+ public static void main(String[] args) throws IOException,
+ ParseException {
+
+ /* Validate command-line arguments. */
+ if (args.length != 1 || !new File(args[0]).exists()) {
+ System.out.println("Usage: java ProcessSanitizedBridges <dir>");
+ System.exit(1);
+ }
+
+ /* Find all files that we should parse. Somewhat fragile, but should
+ * work. */
+ System.out.println("Creating list of files we should parse.");
+ SortedMap<String, File> statuses = new TreeMap<String, File>();
+ SortedMap<String, File> serverDescriptors =
+ new TreeMap<String, File>();
+ SortedMap<String, File> extraInfoDescriptors =
+ new TreeMap<String, File>();
+ Stack<File> files = new Stack<File>();
+ files.add(new File(args[0]));
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ String path = file.getAbsolutePath();
+ String filename = file.getName();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else if (path.contains("statuses")) {
+ statuses.put(filename, file);
+ } else if (path.contains("server-descriptors")) {
+ serverDescriptors.put(filename, file);
+ } else if (path.contains("extra-infos")) {
+ extraInfoDescriptors.put(filename, file);
+ }
+ }
+ System.out.println("We found\n " + statuses.size() + " statuses,\n "
+ + serverDescriptors.size() + " server descriptors, and\n "
+ + extraInfoDescriptors.size() + " extra-info descriptors.");
+
+ /* Parse statuses. */
+ if (!statuses.isEmpty()) {
+ System.out.println("Parsing statuses.");
+ List<String> knownFlags = new ArrayList<String>(Arrays.asList(
+ ("Authority,BadExit,BadDirectory,Exit,Fast,Guard,Named,Stable,"
+ + "Running,Valid,V2Dir").split(",")));
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "statuses.csv"));
+ bw.write("status,fingerprint,descriptor,published,address,orport,"
+ + "dirport");
+ for (String knownFlag : knownFlags) {
+ bw.write("," + knownFlag.toLowerCase());
+ }
+ bw.write("\n");
+ int parsedStatuses = 0, totalStatuses = statuses.size(),
+ writtenOutputLines = 1;
+ long started = System.currentTimeMillis();
+ for (File file : statuses.values()) {
+ String filename = file.getName();
+ if (filename.length() != ("20110101-000703-"
+ + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D").length()) {
+ System.out.println("Status filename has wrong length: '"
+ + filename + "' Please check. Exiting.");
+ System.exit(1);
+ }
+ String statusDateTime = filename.substring(0, 4) + "-"
+ + filename.substring(4, 6) + "-" + filename.substring(6, 8)
+ + " " + filename.substring(9, 11) + ":"
+ + filename.substring(11, 13) + ":"
+ + filename.substring(13, 15);
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("r ")) {
+ String[] parts = line.split(" ");
+ if (parts.length != 9) {
+ System.out.println("r line doesn't have the correct number "
+ + "of entries: '" + line + "'. Please check. Exiting.");
+ System.exit(1);
+ }
+ String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
+ parts[2] + "="));
+ String descriptor = Hex.encodeHexString(Base64.decodeBase64(
+ parts[2] + "="));
+ String published = parts[4] + " " + parts[5];
+ String address = parts[6];
+ String orPort = parts[7];
+ String dirPort = parts[8];
+ bw.write(statusDateTime + "," + fingerprint + "," + descriptor
+ + "," + published + "," + address + "," + orPort + ","
+ + dirPort);
+ } else if (line.equals("s") || line.startsWith("s ")) {
+ String flags = line.substring(1);
+ for (String flag : knownFlags) {
+ if (flags.contains(" " + flag)) {
+ bw.write(",TRUE");
+ } else {
+ bw.write(",FALSE");
+ }
+ }
+ bw.write("\n");
+ writtenOutputLines++;
+ }
+ }
+ br.close();
+ parsedStatuses++;
+ if (parsedStatuses % (totalStatuses / 10) == 0) {
+ double fractionDone = (double) (parsedStatuses) /
+ (double) totalStatuses;
+ double fractionLeft = 1.0D - fractionDone;
+ long now = System.currentTimeMillis();
+ double millisLeft = ((double) (now - started)) * fractionLeft /
+ fractionDone;
+ long secondsLeft = (long) millisLeft / 1000L;
+ System.out.println(" " + (parsedStatuses / (totalStatuses
+ / 10)) + "0% done, " + secondsLeft + " seconds left.");
+ }
+ }
+ bw.close();
+ System.out.println("Parsed " + parsedStatuses + " statuses and "
+ + "wrote " + writtenOutputLines + " lines to statuses.csv.");
+ }
+
+ /* Parse server descriptors and extra-info descriptors. */
+ if (!serverDescriptors.isEmpty()) {
+ System.out.println("Parsing server descriptors and extra-info "
+ + "descriptors.");
+ List<String> knownCountries = new ArrayList<String>(Arrays.asList(
+ ("?? A1 A2 AD AE AF AG AI AL AM AN AO AP AQ AR AS AT AU AW AX "
+ + "AZ BA BB BD BE BF BG BH BI BJ BM BN BO BR BS BT BV BW BY BZ "
+ + "CA CD CF CG CH CI CK CL CM CN CO CR CS CU CV CY CZ DE DJ DK "
+ + "DM DO DZ EC EE EG ER ES ET EU FI FJ FK FM FO FR GA GB GD GE "
+ + "GF GG GH GI GL GM GN GP GQ GR GT GU GW GY HK HN HR HT HU ID "
+ + "IE IL IM IN IO IQ IR IS IT JE JM JO JP KE KG KH KI KM KN KP "
+ + "KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD ME MF "
+ + "MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC "
+ + "NE NF NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PR "
+ + "PS PT PW PY QA RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK "
+ + "SL SM SN SO SR ST SV SY SZ TC TD TG TH TJ TK TL TM TN TO TR "
+ + "TT TV TW TZ UA UG UM US UY UZ VA VC VE VG VI VN VU WF WS YE "
+ + "YT ZA ZM ZW").toLowerCase().split(" ")));
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "descriptors.csv"));
+ bw.write("descriptor,fingerprint,published,address,orport,dirport,"
+ + "version,platform,uptime,bridgestatsend,bridgestatsseconds");
+ for (String country : knownCountries) {
+ bw.write("," + country);
+ }
+ bw.write(",bridgestatscountries,bridgestatstotal\n");
+ int parsedServerDescriptors = 0, parsedExtraInfoDescriptors = 0,
+ parsedGeoipStats = 0, skippedGeoipStats = 0,
+ parsedBridgeStats = 0,
+ totalServerDescriptors = serverDescriptors.size(),
+ writtenOutputLines = 1;
+ SimpleDateFormat timeFormat = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long started = System.currentTimeMillis();
+ for (File file : serverDescriptors.values()) {
+ String filename = file.getName();
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line, fingerprint = null, published = null, address = null,
+ orPort = null, dirPort = null, version = null,
+ platform = null, uptime = null, extraInfoDigest = null,
+ bridgeStatsEnd = null, bridgeStatsSeconds = null;
+ SortedMap<String, String> bridgeStatsIps =
+ new TreeMap<String, String>();
+ long bridgeStatsTotal = 0L;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("opt ")) {
+ line = line.substring(4);
+ }
+ if (line.startsWith("router ")) {
+ String[] parts = line.split(" ");
+ address = parts[2];
+ orPort = parts[3];
+ dirPort = parts[4];
+ } else if (line.startsWith("platform ")) {
+ version = line.split(" ")[2];
+ platform = line.substring(line.indexOf("on ")
+ + "on ".length());
+ if (platform.contains("Windows")) {
+ platform = "Windows";
+ } else if (platform.contains("Linux")) {
+ platform = "Linux";
+ } else if (platform.contains("Darwin")) {
+ platform = "Mac OS X";
+ } else if (platform.contains("BSD")) {
+ platform = "*BSD";
+ } else {
+ platform = "Other";
+ }
+ } else if (line.startsWith("published ")) {
+ String[] parts = line.split(" ");
+ published = parts[1] + " " + parts[2];
+ } else if (line.startsWith("fingerprint ")) {
+ fingerprint = line.substring("fingerprint".length()).
+ replaceAll(" ", "").toLowerCase();
+ } else if (line.startsWith("uptime ")) {
+ uptime = line.split(" ")[1];
+ } else if (line.startsWith("extra-info-digest ")) {
+ extraInfoDigest = line.substring("extra-info-digest ".
+ length()).toLowerCase();
+ if (extraInfoDescriptors.containsKey(extraInfoDigest)) {
+ parsedExtraInfoDescriptors++;
+ BufferedReader br2 = new BufferedReader(new FileReader(
+ extraInfoDescriptors.get(extraInfoDigest)));
+ String geoipStartTime = null, bridgeStatsEndLine = null;
+ while ((line = br2.readLine()) != null) {
+ if (line.startsWith("geoip-start-time ")) {
+ geoipStartTime = line.substring("geoip-start-time ".
+ length());
+ } else if (line.startsWith("geoip-client-origins ") &&
+ line.split(" ").length > 1 && published != null &&
+ geoipStartTime != null) {
+ if (version.startsWith("0.2.2.")) {
+ skippedGeoipStats++;
+ } else {
+ parsedGeoipStats++;
+ bridgeStatsEnd = published;
+ bridgeStatsSeconds = "" +
+ + (timeFormat.parse(published).getTime()
+ - timeFormat.parse(geoipStartTime).getTime())
+ / 1000L;
+ for (String pair : line.split(" ")[1].split(",")) {
+ String country = pair.substring(0, 2);
+ String ips = pair.substring(3);
+ bridgeStatsIps.put(country, ips);
+ bridgeStatsTotal += Long.parseLong(ips);
+ }
+ }
+ } else if (line.startsWith("bridge-stats-end ")) {
+ bridgeStatsEndLine = line;
+ } else if (line.startsWith("bridge-ips ") &&
+ line.length() > "bridge-ips ".length() &&
+ bridgeStatsEndLine != null) {
+ parsedBridgeStats++;
+ String[] parts = bridgeStatsEndLine.split(" ");
+ bridgeStatsEnd = parts[1] + " " + parts[2];
+ bridgeStatsSeconds = parts[3].substring(1);
+ for (String pair : line.split(" ")[1].split(",")) {
+ String country = pair.substring(0, 2);
+ String ips = pair.substring(3);
+ bridgeStatsIps.put(country, ips);
+ bridgeStatsTotal += Long.parseLong(ips);
+ }
+ }
+ }
+ br2.close();
+ }
+ }
+ }
+ br.close();
+ if (fingerprint == null || published == null || address == null ||
+ orPort == null || dirPort == null || version == null ||
+ platform == null || uptime == null) {
+ System.out.println("Server descriptor " + filename + " is "
+ + "missing critical information. Please check. Exiting.");
+ System.exit(1);
+ }
+ bw.write(filename + "," + fingerprint + "," + published + ","
+ + address + "," + orPort + "," + dirPort + "," + version + ","
+ + platform + "," + uptime);
+ if (bridgeStatsEnd != null) {
+ bw.write("," + bridgeStatsEnd + "," + bridgeStatsSeconds);
+ int bridgeStatsCountries = bridgeStatsIps.size();
+ for (String country : knownCountries) {
+ if (bridgeStatsIps.containsKey(country)) {
+ bw.write("," + bridgeStatsIps.remove(country));
+ } else {
+ bw.write(",0");
+ }
+ }
+ if (!bridgeStatsIps.isEmpty()) {
+ StringBuilder message = new StringBuilder();
+ for (String country : bridgeStatsIps.keySet()) {
+ message.append(", " + country);
+ }
+ System.out.println("Unknown " + (bridgeStatsIps.size() == 1 ?
+ "country" : "countries") + " " + message.toString().
+ substring(2) + " in extra-info descriptor "
+ + extraInfoDigest + ". Please check. Exiting.");
+ System.exit(1);
+ }
+ bw.write("," + bridgeStatsCountries + "," + bridgeStatsTotal
+ + "\n");
+ } else {
+ bw.write(",NA,NA");
+ for (String country : knownCountries) {
+ bw.write(",NA");
+ }
+ bw.write(",NA,NA\n");
+ }
+ writtenOutputLines++;
+ parsedServerDescriptors++;
+ if (parsedServerDescriptors % (totalServerDescriptors / 100)
+ == 0) {
+ double fractionDone = (double) (parsedServerDescriptors) /
+ (double) totalServerDescriptors;
+ double fractionLeft = 1.0D - fractionDone;
+ long now = System.currentTimeMillis();
+ double millisLeft = ((double) (now - started)) * fractionLeft /
+ fractionDone;
+ long secondsLeft = (long) millisLeft / 1000L;
+ System.out.println(" " + (parsedServerDescriptors /
+ (totalServerDescriptors / 100)) + "% done, " + secondsLeft
+ + " seconds left.");
+ }
+ }
+ bw.close();
+ System.out.println("Parsed " + parsedServerDescriptors + " server "
+ + "descriptors and " + parsedExtraInfoDescriptors
+ + " extra-info descriptors.\nParsed " + parsedGeoipStats
+ + " geoip-stats and " + parsedBridgeStats + " bridge-stats.\n"
+ + "Skipped " + skippedGeoipStats + " broken geoip-stats of "
+ + "0.2.2.x bridges.\nWrote " + writtenOutputLines + " to "
+ + "descriptors.csv.");
+ }
+
+ /* This is it. */
+ System.out.println("Terminating.");
+ }
+}
+
diff --git a/task-2680/README b/task-2680/README
new file mode 100644
index 0000000..a00856f
--- /dev/null
+++ b/task-2680/README
@@ -0,0 +1,145 @@
+This ticket contains Java and R code to
+
+ a) process bridge and relay data to convert them to a format that is more
+ useful for researchers and
+ b) verify that the output data files are valid.
+
+This README has a separate section for each Java or R code snippet.
+
+The Java applications produce three output formats containing bridge
+descriptors, bridge status lines, and hashed relay identities. The data
+formats are described below.
+
+--------------------------------------------------------------------------
+
+ProcessSanitizedBridges.java
+
+ - Download sanitized bridge descriptors from the metrics website, e.g.,
+ https://metrics.torproject.org/data/bridge-descriptors-2011-01.tar.bz2,
+ and extract them in a local directory, e.g., bridge-descriptors/.
+
+ - Download Apache Commons Codec 1.4 or higher and put in in this
+ directory.
+
+ - Compile the Java class, e.g.,
+ $ javac -cp commons-codec-1.4.jar ProcessSanitizedBridges.java
+
+ - Run the Java class, e.g.,
+ $ java -cp .:commons-codec-1.4.jar ProcessSanitizedBridges
+ bridge-descriptors/
+
+ - Once the Java application is done, you'll find the two files
+ statuses.csv and descriptors.csv in this directory.
+
+--------------------------------------------------------------------------
+
+ProcessRelayConsensuses.java
+
+ - Download v3 relay consensuses from the metrics website, e.g.,
+ https://metrics.torproject.org/data/consensuses-2011-01.tar.bz2, and
+ extract them in a local directory, e.g., consensuses/.
+
+ - Download Apache Commons Codec 1.4 or higher and put in in this
+ directory, unless you haven't already done this above for
+ ProcessSanitizedBridges.java.
+
+ - Compile the Java class, e.g.,
+ $ javac -cp commons-codec-1.4.jar ProcessRelayConsensuses.java
+
+ - Run the Java class, e.g.,
+ $ java -cp .:commons-codec-1.4.jar ProcessRelayConsensuses consensuses/
+
+ - Once the Java application is done, you'll find a file relays.csv in
+ this directory.
+
+--------------------------------------------------------------------------
+
+verify.R
+
+ - Run the R verification script like this:
+ $ R --slave -f verify.R
+
+--------------------------------------------------------------------------
+
+descriptors.csv
+
+The descriptors.csv file contains one line for each bridge descriptor that
+a bridge has published. This descriptor consists of fields coming from
+the bridge's server descriptor and the bridge's extra-info descriptor that
+was published at the same time.
+
+The columns in descriptors.csv are:
+
+ - descriptor: Hex-formatted descriptor identifier
+ - fingerprint: Hex-formatted SHA-1 hash of identity fingerprint
+ - published: ISO-formatted descriptor publication time
+ - address: Sanitized IPv4 address in dotted notation
+ - orport: OR port
+ - dirport: Dir port
+ - version: Tor version
+ - platform: Operating system family (Windows, Linux, etc.)
+ - uptime: Uptime in seconds
+ - bridgestatsend: ISO-formatted time when stats interval ended
+ - bridgestatsseconds: Stats interval length in seconds
+ - ??: Unique client IP addresses that could not be resolved
+ - a1: Unique client IP addresses from anonymous proxies
+ - a2: Unique client IP addresses from satellite providers
+ - ad: Unique client IP addresses from Andorra
+ - ae: Unique client IP addresses from the United Arab Emirates
+ - [...] See ISO 3166-1 alpha-2 country codes
+ - zw: Unique client IP addresses from Zimbabwe
+ - bridgestatscountries: Number of countries with non-zero unique IPs
+ - bridgestatstotal: Total number of unique IPs
+
+There are two sources for the bridgestats* and country-code columns,
+depending on Tor's version. Bridges running Tor version 0.2.1.x or
+earlier use dynamic stats intervals from a few hours to a few days.
+Bridges running early 0.2.2.x versions published faulty stats and are
+therefore removed from descriptors.csv. Bridges running 0.2.2.x or higher
+(except the faulty 0.2.2.x versions) collect stats in 24-hour intervals.
+
+--------------------------------------------------------------------------
+
+statuses.csv
+
+The statuses.csv file contains one line for every bridge that is
+referenced in a bridge network status. Note that if a bridge is running
+for, say, 12 hours, it will be contained in 24 half-hourly published
+statuses in that time and will be listed 24 times in statuses.csv.
+
+The columns in statuses.csv are:
+
+ - status: ISO-formatted status publication time
+ - fingerprint: Hex-formatted SHA-1 hash of identity fingerprint
+ - descriptor: Hex-formatted descriptor identifier
+ - published: ISO-formatted descriptor publication time
+ - address: Sanitized IPv4 address in dotted notation
+ - orport: OR port
+ - dirport: Dir port
+ - authority: TRUE if bridge has the Authority flag, FALSE otherwise
+ - badexit: TRUE if bridge has the BadExit flag, FALSE otherwise
+ - baddirectory: TRUE if bridge has the BadDirectory flag, FALSE otherwise
+ - exit: TRUE if bridge has the Exit flag, FALSE otherwise
+ - fast: TRUE if bridge has the Fast flag, FALSE otherwise
+ - guard: TRUE if bridge has the Guard flag, FALSE otherwise
+ - named: TRUE if bridge has the Named flag, FALSE otherwise
+ - stable: TRUE if bridge has the Stable flag, FALSE otherwise
+ - running: TRUE if bridge has the Running flag, FALSE otherwise
+ - valid: TRUE if bridge has the Valid flag, FALSE otherwise
+ - v2dir: TRUE if bridge has the V2Dir flag, FALSE otherwise
+
+--------------------------------------------------------------------------
+
+relays.csv
+
+The relays.csv file contains SHA-1 hashes of identity fingerprints of
+normal relays. If a bridge uses the same identity key that it also used
+as a relay, it might observe more users than it would observe as a pure
+bridge. Therefore, bridges that have been running as relays before should
+be excluded from bridge statistics.
+
+The columns in relays.csv are:
+
+ - consensus: ISO-formatted consensus publication time
+ - fingerprint: Hex-formatted SHA-1 hash of identity fingerprint
+
diff --git a/task-2680/verify.R b/task-2680/verify.R
new file mode 100644
index 0000000..63ef233
--- /dev/null
+++ b/task-2680/verify.R
@@ -0,0 +1,27 @@
+# Usage: R --slave -f verify.R
+
+if (file.exists("descriptors.csv")) {
+ cat("Verifying descriptors.csv. This may take a while.\n")
+ d <- read.csv("descriptors.csv", stringsAsFactors = FALSE)
+ cat(" ", length(na.omit(d$bridgestatsend)), "of", length(d$descriptor),
+ "descriptors contain bridge stats.\n")
+} else {
+ cat("descriptors.csv does not exist\n")
+}
+
+if (file.exists("statuses.csv")) {
+ cat("Verifying statuses.csv. This may take a while.\n")
+ s <- read.csv("statuses.csv", stringsAsFactors = FALSE)
+ cat(" ", length(s[s$running == TRUE, "running"]), "of",
+ length(s$running), "bridges contained in the statuses have the",
+ "Running flag.\n")
+} else {
+ cat("statuses.csv does not exist\n")
+}
+
+if (file.exists("relays.csv")) {
+ cat("Verifying relays.csv. This may take a while.\n")
+ r <- read.csv("relays.csv", stringsAsFactors = FALSE)
+ summary(as.POSIXct(r$consensus))
+}
+
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits