[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Add simulation code and LaTeX sources of #2911 draft.
commit cff86510fd86e73a2f66f21ac8b47068d187d972
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Mon May 30 11:16:10 2011 +0200
Add simulation code and LaTeX sources of #2911 draft.
---
task-2911/.gitignore | 9 +
task-2911/README | 105 ++++++
.../mtbf-sim/SimulateMeanTimeBetweenFailure.java | 351 ++++++++++++++++++++
task-2911/mtbf-sim/mtbf-sim.R | 73 ++++
task-2911/report.tex | 295 ++++++++++++++++
.../wfu-sim/SimulateWeightedFractionalUptime.java | 314 +++++++++++++++++
task-2911/wfu-sim/wfu-sim.R | 57 ++++
7 files changed, 1204 insertions(+), 0 deletions(-)
diff --git a/task-2911/.gitignore b/task-2911/.gitignore
new file mode 100644
index 0000000..d2480c1
--- /dev/null
+++ b/task-2911/.gitignore
@@ -0,0 +1,9 @@
+*.class
+mtbf-sim/tunf/
+wfu-sim/fwfu/
+wfu-sim/consensuses/
+*.csv
+*.aux
+*.log
+*.pdf
+
diff --git a/task-2911/README b/task-2911/README
new file mode 100644
index 0000000..bcefa2d
--- /dev/null
+++ b/task-2911/README
@@ -0,0 +1,105 @@
+Tech report: An Analysis of Tor Relay Stability
+===============================================
+
+Simulation of MTBF requirements
+-------------------------------
+
+Change to the MTBF simulation directory:
+
+ $ cd mtbf-sim/
+
+Export status entries and server descriptor parts from the metrics
+database, once in reverse and once in forward order. Note that each file
+will be 2.2G large for roughly 2.5 years of data. Plan for a buffer of at
+least 4 months before and after the interval to investigate:
+
+ tordir=> \o running-relays-reverse.csv
+ tordir=> SELECT statusentry.validafter,
+ statusentry.fingerprint,
+ CASE WHEN descriptor.uptime IS NULL THEN FALSE ELSE
+ statusentry.validafter - descriptor.published +
+ descriptor.uptime * '1 second'::INTERVAL <
+ '01:00:00'::INTERVAL END AS restarted
+ FROM statusentry
+ LEFT JOIN descriptor
+ ON statusentry.descriptor = descriptor.descriptor
+ WHERE statusentry.isrunning
+ AND statusentry.validafter >= '2009-01-01 00:00:00'
+ ORDER BY statusentry.validafter DESC, statusentry.fingerprint;
+ tordir=> \o
+ tordir=> \o running-relays-forward.csv
+ tordir=> SELECT statusentry.validafter,
+ statusentry.fingerprint,
+ CASE WHEN descriptor.uptime IS NULL THEN FALSE ELSE
+ statusentry.validafter - descriptor.published +
+ descriptor.uptime * '1 second'::INTERVAL <
+ '01:00:00'::INTERVAL END AS restarted
+ FROM statusentry
+ LEFT JOIN descriptor
+ ON statusentry.descriptor = descriptor.descriptor
+ WHERE statusentry.isrunning
+ AND statusentry.validafter >= '2009-01-01 00:00:00'
+ ORDER BY statusentry.validafter, statusentry.fingerprint;
+ tordir=> \o
+
+Run the simulation consisting of a reverse and a forward run. The results
+of the reverse run will be stored to the tunf/ directory and will be
+re-used in subsequent simulations. Delete the tunf/ directory to repeat
+the reverse run, too.
+
+ $ javac SimulateMeanTimeBetweenFailure.java
+ $ java SimulateMeanTimeBetweenFailure
+
+Plot the results:
+
+ $ R --slave -f mtbf-sim.R
+
+Once you're satisfied with the result, copy the graph to the parent
+directory to include it in the report:
+
+ $ cp mtbf-sim.pdf ../
+
+
+Simulation of WFU requirements
+------------------------------
+
+Change to the WFU simulation directory:
+
+ $ cd wfu-sim/
+
+Create a consensuses/ directory and put the consensus files of the
+interval to investigate plus 4+ months before and 4+ months after in it:
+
+ $ mkdir consensuses/
+ $ ln -s $extracted/consensuses-20* .
+
+Run the simulation that first parses consensuses from last to first and
+then from first to last. The results from the reverse direction will be
+stored in the fwfu/ directory and re-used in subsequent simulations.
+Delete the fwfu/ directory to re-run both simulation parts.
+
+ $ javac SimulateWeightedFractionalUptime.java
+ $ java SimulateWeightedFractionalUptime
+
+Plot the results:
+
+ $ R --slave -f wfu-sim.R
+
+Copy the graph to the parent directory to include it in the report:
+
+ $ cp wfu-sim.pdf ../
+
+
+Compiling the report
+--------------------
+
+Copy the generated graphs to the base directory, unless you have done so
+before:
+
+ $ cp mtbf-sim/mtbf-sim.pdf .
+ $ cp wfu-sim/wfu-sim.pdf .
+
+Compile the report:
+
+ $ pdflatex report.tex
+
diff --git a/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java b/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java
new file mode 100644
index 0000000..cd73f82
--- /dev/null
+++ b/task-2911/mtbf-sim/SimulateMeanTimeBetweenFailure.java
@@ -0,0 +1,351 @@
+/**
+ * Simulate variation of mean time between failure on Stable relays. The
+ * simulation is based on the previously generated SQL results containing
+ * network status entries and parts of server descriptors. In a first
+ * step, parse the SQL results that are in descending order to calculate
+ * time until next failure for all relays and write them to disk as one
+ * file per network status in tunf/$filename. (Skip this step if there is
+ * already a tunf/ directory.) In a second step, parse the network
+ * statuses again, but this time from first to last, calculate mean times
+ * between failure for all relays, form relay subsets based on minimal
+ * MTBF, look up what the time until next failure would be for a subset,
+ * and write results to mtbf-sim.csv to disk. */
+import java.io.*;
+import java.text.*;
+import java.util.*;
+public class SimulateMeanTimeBetweenFailure {
+ public static void main(String[] args) throws Exception {
+
+ /* Measure how long this execution takes. */
+ long started = System.currentTimeMillis();
+
+ /* Decide whether we need to do the reverse run, or if we can use
+ * previous results. */
+ if (!new File("tunf").exists()) {
+
+ /* For each relay as identified by its hex encoded fingerprint,
+ * track time until next failure in seconds in a long. */
+ SortedMap<String, Long> knownRelays = new TreeMap<String, Long>();
+
+ /* Parse previously exported network status entries in reverse
+ * order. */
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat isoFormatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Map<String, String> runningRelays = new HashMap<String, String>();
+ BufferedReader br = new BufferedReader(new FileReader(
+ "running-relays-reverse.csv"));
+ String line, lastValidAfter = null, lastButOneValidAfter = null;
+ while ((line = br.readLine()) != null) {
+ if (!line.startsWith("20")) {
+ continue;
+ }
+ String[] parts = line.split(",");
+ String validAfter = parts[0];
+ if (lastValidAfter != null &&
+ !lastValidAfter.equals(validAfter)) {
+
+ /* We just parsed all lines of a consensus. Let's write times
+ * until next failure to disk for all running relays and update
+ * our internal history. */
+ if (lastButOneValidAfter == null) {
+ lastButOneValidAfter = lastValidAfter;
+ }
+ long lastValidAfterMillis = isoFormatter.parse(lastValidAfter).
+ getTime();
+ File tunfFile = new File("tunf",
+ formatter.format(lastValidAfterMillis));
+ tunfFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ tunfFile));
+ long secondsSinceLastValidAfter =
+ (isoFormatter.parse(lastButOneValidAfter).getTime()
+ - lastValidAfterMillis) / 1000L;
+
+ /* Iterate over our history first and see if these relays have
+ * been running in the considered consensus. Remember changes
+ * to our history and modify it below to avoid concurrent
+ * modification errors. */
+ Set<String> removeFromHistory = new HashSet<String>();
+ Map<String, Long> addToHistory = new HashMap<String, Long>();
+ for (Map.Entry<String, Long> e : knownRelays.entrySet()) {
+ String fingerprint = e.getKey();
+ if (runningRelays.containsKey(fingerprint)) {
+
+ /* This relay has been running, so write it to the output
+ * file and update our history. */
+ long hoursUntilFailure = e.getValue();
+ bw.write(fingerprint + "," + (secondsSinceLastValidAfter
+ + hoursUntilFailure) + "\n");
+ boolean restarted = runningRelays.get(fingerprint).
+ split(",")[2].equals("t");
+ if (restarted) {
+ removeFromHistory.add(fingerprint);
+ } else {
+ addToHistory.put(fingerprint, secondsSinceLastValidAfter
+ + hoursUntilFailure);
+ }
+ runningRelays.remove(fingerprint);
+ } else {
+
+ /* This relay has not been running, so remove it from our
+ * history. */
+ removeFromHistory.add(fingerprint);
+ }
+ }
+
+ /* Update our history for real now. We couldn't do this above,
+ * or we'd have modified the set we've been iterating over. */
+ for (String f : removeFromHistory) {
+ knownRelays.remove(f);
+ }
+ for (Map.Entry<String, Long> e : addToHistory.entrySet()) {
+ knownRelays.put(e.getKey(), e.getValue());
+ }
+
+ /* Iterate over the relays that we found in the consensus, but
+ * that we didn't have in our history. */
+ for (Map.Entry<String, String> e : runningRelays.entrySet()) {
+ String fingerprint = e.getKey();
+ bw.write(fingerprint + ",0\n");
+ boolean restarted = e.getValue().split(",")[2].equals("t");
+ if (!restarted) {
+ knownRelays.put(fingerprint, 0L);
+ }
+ }
+ bw.close();
+
+ /* Prepare for next consensus. */
+ runningRelays = new HashMap<String, String>();
+ lastButOneValidAfter = lastValidAfter;
+ }
+
+ /* Add the running relay lines to a map that we parse once we have
+ * all lines of a consensus. */
+ String fingerprint = parts[1];
+ runningRelays.put(fingerprint, line);
+ lastValidAfter = validAfter;
+ }
+ }
+
+ /* Run the simulation for the following WMTBF percentiles: */
+ List<Long> requiredWMTBFs = new ArrayList<Long>();
+ for (long l : new long[] { 20, 30, 40, 50, 60, 70, 80 }) {
+ requiredWMTBFs.add(l);
+ }
+ Collections.sort(requiredWMTBFs);
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ "mtbf-sim.csv"));
+ bw.write("time");
+ for (long requiredWMTBF : requiredWMTBFs) {
+ bw.write(",mtunf" + requiredWMTBF + ",perc75tunf" + requiredWMTBF
+ + ",perc80tunf" + requiredWMTBF + ",perc85tunf" + requiredWMTBF
+ + ",perc90tunf" + requiredWMTBF + ",perc95tunf" + requiredWMTBF
+ + ",wmtbf" + requiredWMTBF);
+ }
+ bw.write("\n");
+
+ /* For each relay as identified by its base-64 encoded fingerprint,
+ * track weighted run length, total run weights, and current run
+ * length in a double[3]. */
+ SortedMap<String, double[]> knownRelays =
+ new TreeMap<String, double[]>();
+
+ /* Parse previously exported network status entries again, but this
+ * time in forward order. */
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat isoFormatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ Map<String, String> runningRelays = new HashMap<String, String>(),
+ lastRunningRelays = new HashMap<String, String>();
+ BufferedReader br = new BufferedReader(new FileReader(
+ "running-relays-forward.csv"));
+ String line, lastValidAfter = null, firstValidAfter = null;
+ long nextWeightingInterval = -1L;
+ while ((line = br.readLine()) != null) {
+ if (!line.startsWith("20")) {
+ continue;
+ }
+ String[] parts = line.split(",");
+ String validAfter = parts[0];
+ if (firstValidAfter == null) {
+ firstValidAfter = validAfter;
+ }
+ if (lastValidAfter != null &&
+ !lastValidAfter.equals(validAfter)) {
+
+ /* We just parsed all lines of a consensus. First, see if 12
+ * hours have passed since we last discounted weighted run lengths
+ * and total run weights. If so, discount both variables for all
+ * known relays by factor 0.95 (or 19/20 since these are long
+ * integers) and remove those relays with a total run weight below
+ * 1/10000. */
+ long lastValidAfterMillis = isoFormatter.parse(lastValidAfter).
+ getTime();
+ long validAfterMillis = isoFormatter.parse(validAfter).getTime();
+ long weightingInterval = validAfterMillis
+ / (12L * 60L * 60L * 1000L);
+ if (nextWeightingInterval < 0L) {
+ nextWeightingInterval = weightingInterval;
+ }
+ while (weightingInterval > nextWeightingInterval) {
+ Set<String> relaysToRemove = new HashSet<String>();
+ for (Map.Entry<String, double[]> e : knownRelays.entrySet()) {
+ double[] w = e.getValue();
+ w[0] *= 0.95;
+ w[1] *= 0.95;
+ }
+ for (String fingerprint : relaysToRemove) {
+ knownRelays.remove(fingerprint);
+ }
+ nextWeightingInterval += 1L;
+ }
+
+ /* Update history for running relays. Start by iterating over all
+ * relays in the history, see if they're running now and whether
+ * they have been restarted. Distinguish four cases for relays in
+ * the history: 1) still running, 2) still running but restarted,
+ * 3) started in this consensus, 4) stopped in this consensus. */
+ double secondsSinceLastValidAfter =
+ (double) ((validAfterMillis - lastValidAfterMillis) / 1000L);
+ Set<String> updatedRelays = new HashSet<String>();
+ for (Map.Entry<String, double[]> e : knownRelays.entrySet()) {
+ String fingerprint = e.getKey();
+ double[] w = e.getValue();
+ if (runningRelays.containsKey(fingerprint)) {
+ if (w[2] > 0.1) {
+ if (!runningRelays.get(fingerprint).split(",")[2].
+ equals("t")) {
+
+ /* Case 1) still running: */
+ w[2] += secondsSinceLastValidAfter;
+ } else {
+
+ /* Case 2) still running but restarted: */
+ w[0] += w[2];
+ w[1] += 1.0;
+ w[2] = secondsSinceLastValidAfter;
+ }
+ } else {
+
+ /* Case 3) started in this consensus: */
+ w[2] = secondsSinceLastValidAfter;
+ }
+
+ /* Mark relay as already processed, or we'd add it to the
+ * history as a new relay below. */
+ updatedRelays.add(fingerprint);
+ } else if (w[2] > 0.1) {
+
+ /* Case 4) stopped in this consensus: */
+ w[0] += w[2];
+ w[1] += 1.0;
+ w[2] = 0.0;
+ }
+ }
+
+ /* Iterate over the set of currently running relays and add those
+ * that we haven't processed above to our history. */
+ for (String fingerprint : runningRelays.keySet()) {
+ if (!updatedRelays.contains(fingerprint)) {
+ updatedRelays.add(fingerprint);
+ knownRelays.put(fingerprint, new double[] { 0.0, 0.0,
+ secondsSinceLastValidAfter });
+ }
+ }
+
+ /* Calculate WMTBFs for all running relays and put them in a list
+ * that we can sort by WMTBF in descending order. */
+ List<String> wmtbfs = new ArrayList<String>();
+ for (String fingerprint : runningRelays.keySet()) {
+ double[] w = knownRelays.get(fingerprint);
+ double totalRunLength = w[0] + w[2];
+ double totalWeights = w[1] + (w[2] > 0.1 ? 1.0 : 0.0);
+ long wmtbf = totalWeights < 0.0001 ? 0
+ : (long) (totalRunLength / totalWeights);
+ wmtbfs.add(String.format("%012d %s", wmtbf, fingerprint));
+ }
+ Collections.sort(wmtbfs, Collections.reverseOrder());
+
+ /* Read previously calculated TUNFs from disk. */
+ Map<String, Long> tunfs = new HashMap<String, Long>();
+ File tunfFile = new File("tunf",
+ formatter.format(lastValidAfterMillis));
+ if (!tunfFile.exists()) {
+ if (!lastValidAfter.equals(firstValidAfter)) {
+ System.out.println("Could not find file " + tunfFile
+ + ". Skipping simulation!");
+ }
+ } else {
+ BufferedReader tunfBr = new BufferedReader(new FileReader(
+ tunfFile));
+ String tunfLine;
+ while ((tunfLine = tunfBr.readLine()) != null) {
+ String[] tunfParts = tunfLine.split(",");
+ tunfs.put(tunfParts[0], Long.parseLong(tunfParts[1]));
+ }
+ tunfBr.close();
+
+ /* Run the simulation for the relays in the current consensus
+ * for various required WFUs. */
+ bw.write(isoFormatter.format(lastValidAfterMillis));
+ long totalRelays = (long) wmtbfs.size(), selectedRelays = 0L,
+ totalTunf = 0L, minimalWmtbf = 0L;
+ int simulationIndex = 0;
+ List<Long> tunfList = new ArrayList<Long>();
+ for (String relay : wmtbfs) {
+ while (simulationIndex < requiredWMTBFs.size() &&
+ selectedRelays * 100L > totalRelays
+ * requiredWMTBFs.get(simulationIndex)) {
+ if (selectedRelays == 0L) {
+ bw.write(",NA,NA,NA,NA,NA,NA");
+ } else {
+ Collections.sort(tunfList, Collections.reverseOrder());
+ long perc75 = tunfList.get((75 * tunfList.size()) / 100);
+ long perc80 = tunfList.get((80 * tunfList.size()) / 100);
+ long perc85 = tunfList.get((85 * tunfList.size()) / 100);
+ long perc90 = tunfList.get((90 * tunfList.size()) / 100);
+ long perc95 = tunfList.get((95 * tunfList.size()) / 100);
+ bw.write("," + (totalTunf / selectedRelays) + "," + perc75
+ + "," + perc80 + "," + perc85 + "," + perc90 + ","
+ + perc95);
+ }
+ bw.write("," + minimalWmtbf);
+ simulationIndex++;
+ }
+ String[] wmtbfParts = relay.split(" ");
+ minimalWmtbf = Long.parseLong(wmtbfParts[0]);
+ String fingerprint = wmtbfParts[1];
+ long tunf = tunfs.get(fingerprint);
+ totalTunf += tunf;
+ tunfList.add(tunf);
+ selectedRelays += 1L;
+ }
+ bw.write("\n");
+ }
+
+ /* We're done with this consensus. Prepare for the next. */
+ lastRunningRelays = runningRelays;
+ runningRelays = new HashMap<String, String>();
+ }
+
+ /* Add the running relay lines to a map that we parse once we have
+ * all lines of a consensus. */
+ String fingerprint = parts[1];
+ runningRelays.put(fingerprint, line);
+ lastValidAfter = validAfter;
+ }
+ bw.close();
+
+ /* Print how long this execution took and exit. */
+ System.out.println("Execution took " + ((System.currentTimeMillis()
+ - started) / (60L * 1000L)) + " minutes.");
+ }
+}
+
diff --git a/task-2911/mtbf-sim/mtbf-sim.R b/task-2911/mtbf-sim/mtbf-sim.R
new file mode 100644
index 0000000..a630406
--- /dev/null
+++ b/task-2911/mtbf-sim/mtbf-sim.R
@@ -0,0 +1,73 @@
+library(ggplot2)
+
+data <- read.csv("mtbf-sim.csv", stringsAsFactors = FALSE)
+d <- data[data$time >= '2010' & data$time < '2011', ]
+d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+d <- rbind(
+ data.frame(x = d$wmtbf30, y = d$perc90tunf30, sim = "30 %"),
+ data.frame(x = d$wmtbf40, y = d$perc90tunf40, sim = "40 %"),
+ data.frame(x = d$wmtbf50, y = d$perc90tunf50, sim = "50 % (default)"),
+ data.frame(x = d$wmtbf60, y = d$perc90tunf60, sim = "60 %"),
+ data.frame(x = d$wmtbf70, y = d$perc90tunf70, sim = "70 %"))
+ggplot(d, aes(x = x / (24 * 60 * 60), y = y / (60 * 60))) +
+facet_wrap(~ sim) +
+geom_path() +
+scale_x_continuous("\nRequired WMTBF in days",
+ breaks = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 7),
+ minor = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 1)) +
+scale_y_continuous(paste("Time in hours until 10 % of relays\nor ",
+ "27.1 % of streams have failed\n", sep = ""),
+ breaks = seq(0, max(d$y, na.rm = TRUE) / (60 * 60), 24))
+ggsave(filename = "mtbf-sim.pdf", width = 8, height = 5, dpi = 100)
+
+## Commented out, because this graph is meaningless in b/w. The graph
+## above contains the same data, but can be printed in b/w.
+#data <- read.csv("mtbf-sim.csv", stringsAsFactors = FALSE)
+#d <- data[data$time >= '2010' & data$time < '2011', ]
+#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+#d <- rbind(
+# data.frame(x = d$wmtbf70, y = d$perc90tunf70, sim = "70 %"),
+# data.frame(x = d$wmtbf60, y = d$perc90tunf60, sim = "60 %"),
+# data.frame(x = d$wmtbf50, y = d$perc90tunf50, sim = "50 % (default)"),
+# data.frame(x = d$wmtbf40, y = d$perc90tunf40, sim = "40 %"),
+# data.frame(x = d$wmtbf30, y = d$perc90tunf30, sim = "30 %"))
+#ggplot(d, aes(x = x / (24 * 60 * 60), y = y / (60 * 60),
+# colour = sim)) +
+#geom_path() +
+#scale_x_continuous("\nRequired WMTBF in days",
+# breaks = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 7),
+# minor = seq(0, max(d$x, na.rm = TRUE) / (24 * 60 * 60), 1)) +
+#scale_y_continuous(paste("Time until \n10 % of relays or \n",
+# "27.1 % of streams \nhave failed \nin hours ", sep = ""),
+# breaks = seq(0, max(d$y, na.rm = TRUE) / (60 * 60), 24)) +
+#scale_colour_hue("Fraction of relays\nby highest WMTBF",
+# breaks = c("30 %", "40 %", "50 % (default)", "60 %", "70 %")) +
+#opts(axis.title.x = theme_text(size = 12 * 0.8, face = "bold",
+# hjust = 0.5),
+# axis.title.y = theme_text(size = 12 * 0.8, face = "bold", vjust = 0.5,
+# hjust = 1))
+#ggsave(filename = "mtbf-sim.pdf", width = 8, height = 5, dpi = 100)
+
+## Commented out, because focusing on the development over time is the
+## wrong thing here.
+#simulations <- paste("mtunf", c(20, 30, 40, 50, 60, 70, 80),
+# sep = "")
+#d <- data[data$time >= '2010' & data$time < '2011',
+# c("time", simulations)]
+#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+#d <- melt(d, id.vars = 1)
+#ggplot(d, aes(x = date, y = value / (24 * 60 * 60), colour = variable)) +
+#geom_line() +
+#scale_x_date("", major = "3 months", minor = "1 month",
+# format = "%b %Y") +
+#scale_y_continuous(paste("Mean time \nuntil next \nfailure \n",
+# "in days \n", sep = ""),
+# limits = c(0, max(d$value, na.rm = TRUE) / (24 * 60 * 60))) +
+#scale_colour_hue(paste("Percentile\nhighest\nweighted mean\n",
+# "time between\nfailures", sep = ""), breaks = simulations,
+# labels = paste(substr(simulations, 6, 9),
+# ifelse(simulations == "mtunf50", "(default)", ""))) +
+#opts(axis.title.y = theme_text(size = 12 * 0.8, face = "bold",
+# vjust = 0.5, hjust = 1))
+#ggsave(filename = "mtbf-sim1.pdf", width = 8, height = 5, dpi = 100)
+
diff --git a/task-2911/report.tex b/task-2911/report.tex
new file mode 100644
index 0000000..4dc6ab9
--- /dev/null
+++ b/task-2911/report.tex
@@ -0,0 +1,295 @@
+\documentclass{article}
+\usepackage{url}
+\usepackage[pdftex]{graphicx}
+\usepackage{graphics}
+\usepackage{color}
+\begin{document}
+\title{An Analysis of Tor Relay Stability\\(DRAFT)}
+\author{Karsten Loesing\\{\tt karsten@xxxxxxxxxxxxxx}}
+
+\maketitle
+
+\section{Introduction}
+
+The Tor network consists of 2,200 relays and 600 bridges run by
+volunteers, some of which are on dedicated servers and some on laptops or
+mobile devices.
+% TODO Look up more recent relay and bridge numbers. -KL
+Obviously, we can expect the relays run on dedicated servers to be more
+``stable'' than those on mobile phones.
+But it is difficult to draw a line between stable and unstable relays.
+In most cases it depends on the context which relays count as stable:
+
+\begin{itemize}
+\item A stable relay that is supposed to be part of a circuit for a
+\emph{long-running stream} should not go offline during the next day.
+\item A stable relay that clients pick as \emph{entry guard} doesn't have
+to be running continuously, but should be online most of the time in the
+upcoming weeks.
+\item A stable relay that acts as \emph{hidden-service directory} should
+be part of a relay subset that mostly overlaps with the subsets 1, 2, or
+even 3 hours in the future.
+That means that the relays in this set should be stable, but also that not
+too many new relays should join the set of stable relays at once.
+\item A stable relay that clients use in a \emph{fallback consensus} that
+is already a few days or even weeks old should still be available on the
+same IP address and port.\footnote{See also proposal 146.}
+Such a relay doesn't necessarily have to run without interruption, though.
+% TODO Correctly cite proposal 146 here. -KL
+\item A stable \emph{bridge relay} should be running on the same IP
+address a few days after a client learns about the bridge, but again,
+doesn't have to run continuously.
+\end{itemize}
+
+All these stability notions have in common that some relays or bridges are
+better suited for the described contexts than others.
+In this analysis we will look at various relay stability metrics to find
+the best suited set of relays for each context.
+The idea of this report is to use the results to optimize how the
+directory authorities assign relay flags that clients use to make path
+select decisions.
+
+For every context, we try to simulate what requirements based on past
+observations would have resulted in what relay stabilities in the near
+future.
+Generally, we'd expect that stricter requirements lead to higher
+stability.
+But every prediction contains a certain amount of randomness, so that we
+cannot tighten the requirements arbitrarily.
+Further, we want to ensure that the subset of relays identified as stable
+does not become too small.
+The reason is that there should be some diversity, so that not a few
+operators can aim at running most relays used in a given context.
+In some cases, the stable relays also need to provide sufficient bandwidth
+to the network in order not to become a performance bottleneck.
+We are going into more details about the requirements when looking into
+the separate analyses in the sections below.
+
+The analysis data and tools are available on the Tor metrics website at
+\url{https://metrics.torproject.org/}.\footnote{Or rather, will be made
+available.}
+
+\section{Choosing relays for long-lived streams}
+\label{sec:mtbf-sim}
+
+Whenever clients request Tor to open a long-lived stream, Tor should try
+to pick only those relays for the circuit that are not likely to disappear
+shortly after.
+If only a single relay in the circuit fails, the stream collapses and a
+new circuit needs to be built.
+Depending on how well the application handles connection failures this may
+impact usability significantly.
+
+In order to declare some relays as more useful for long-lived streams, the
+directory authorities track uptime sessions of all relays over time.
+Based on this history, they calculate the \emph{weighted mean time between
+failure (WMTBF)} for each relay.
+The MTBF part simply measures the average uptime between a relay showing
+up in the Tor network and either leaving or failing.
+In the weighted form of this metric, which is used here, older sessions
+are weighted to count less.
+The directory authorities assign the \texttt{Stable} flag to the 50~\% of
+relays with the highest WMTBF.
+
+In this simulation we want to find out how useful the WMTBF metric is for
+predicting future stability and how stability would be affected when
+declaring more or less than 50~\% of the relays as stable.
+The metric we chose for evaluating how stable a relay is is the \emph{time
+until next failure}.
+When running a simulation we determine the time until 10~\% of the
+``stable'' relays have failed.
+Under the (grossly simplified) assumption that relays are chosen
+uniformly, $1 - 0.9^3 = 27.1~\%$ of streams using relays from this set
+would have failed up to this point.
+
+\begin{figure}[t]
+\includegraphics[width=\textwidth]{mtbf-sim.pdf}
+\caption{Impact of assigning the \texttt{Stable} flag to a given fraction
+of relays on the actual required WMTBF ($x$ axis) and on the time
+until 10~\% of relays or 27.1~\% of streams have failed ($y$ axis)}
+\label{fig:mtbf-sim}
+\end{figure}
+
+Figure~\ref{fig:mtbf-sim} shows the analysis results for assigning the
+\texttt{Stable} flag to fractions of relays between 30~\% and 70~\% in a
+path plot.
+This path plot shows the effect of choosing a different fraction of
+relays on the actual required WMTBF value on the $x$ axis and on the
+resulting time until 10~\% of relays have failed on the $y$ axis.
+Two data points adjacent in time are connected by a line, forming a path.
+
+The results indicate a somewhat linear relation between required WMTBF and
+time until failure, which is as expected.
+The time until 10~\% of relays have failed in the default case of having
+50~\% stable relays is somewhere between 12 and 48 hours.
+If the directory authorities assigned the \texttt{Stable} flag to 60~\% or
+even 70~\% of all relays, this time would go down to on average 24 or 12
+hours.
+Reducing the set to only 40~\% or 30\% of relays would increase the time
+until failure to 36 or even 48 hours on average.
+
+\subsubsection*{Next steps}
+
+{\it
+\begin{itemize}
+\item What's the desired stability goal here?
+\item What other requirements (bandwidth) should go into the simulation?
+\end{itemize}
+}
+
+\section{Picking stable entry guards}
+
+Clients pick a set of entry guards as fixed entry points into the Tor
+network.
+Optimally, clients should be able to stick with their choice for a few
+weeks.
+While it is not required for all their entry guards to be running all the
+time, at least a subset of them should be running, or the client needs to
+pick a new set.
+
+Tor's metric for deciding which relays are stable enough to be entry
+guards is \emph{weighted fractional uptime (WFU)}.
+WFU measures the fraction of uptime of a relay in the past with older
+observations weighted to count less.
+The assumption is that a relay that was available most of the time in the
+past will also be available most of the time in the future.
+
+In a first analysis we simulate the effect of varying the requirements for
+becoming an entry guard on the average relay stability in the future.
+We measure future stability by using the same WFU metric, but for uptime
+in the future.
+We similarly weight observations farther in the future less than
+observations in the near future.
+We then simulate different pre-defined required WFUs between $90~\%$ and
+$99.9~\%$ and calculate what the mean future WFUs would be.
+
+\begin{figure}[t]
+\includegraphics[width=\textwidth]{wfu-sim.pdf}
+\caption{Impact of different required WFU on the mean empirical future WFU
+and fraction of potential entry guards}
+\label{fig:wfu-sim}
+\end{figure}
+
+Figure~\ref{fig:wfu-sim} shows the analysis results in a path plot similar
+to the one in Section~\ref{sec:mtbf-sim}.
+This path plot shows the effect of varying the WFU requirement, displayed
+as different line colors, on the fraction of relays meeting this
+requirement on the $x$ axis and on the WFU in the future on the $y$ axis.
+Two data points adjacent in time are connected by a line, forming a path.
+
+In this graph we can see that the majority of data points for the default
+required WFU of 98~\% falls in a future WFU range of 94~\% to 96\% with
+the smallest WFU being no less than 89~\%.
+In most cases, the fraction of relays meeting the default WFU requirement
+is between 40~\% and 50~\%.
+
+If the WFU requirement is relaxed to 95~\% or even 90~\%, the WFU in the
+future decreases slightly towards around 94~\% to 95~\% for most cases.
+At first sight it may seem surprising that a past WFU of 90~\% leads to
+a future WFU of 94~\%, but it makes sense, because the past WFU is a
+required minimum whereas the future WFU is a mean value of all relays
+meeting the requirement.
+Another effect of relaxing the required WFU is that the fraction of relays
+meeting the requirement increases from 50~\% to almost 66~\%.
+
+Interestingly, when tightening the requirement to a WFU value of 99~\% or
+even 99.9~\%, the future WFU does not increase significantly, if at all.
+To the contrary, the future WFU of relays meeting the 99.9~\% requirement
+drops to a range of 91~\% to 94~\% for quite a while.
+A likely explanation for this effect is that the fraction of relays
+meeting these high requirements is only 15~\%.
+While these 15~\% of relays may have had a very high uptime in the past,
+failure of only a few of these relays ruin the WFU metric in the future.
+
+A cautious conclusion of this analysis could be that, if the goal is to
+increase the number of \texttt{Guard} relays, reducing the required WFU to
+95~\% or even 90~\% wouldn't impact relay stability by too much.
+Conversely, increasing the required WFU beyond the current value of 98~\%
+doesn't make much sense and might even negatively affect relay stability.
+
+\subsubsection*{Next steps}
+
+{\it
+\begin{itemize}
+\item Tor penalizes relays that change their IP address or port by ending
+the running uptime session and starting a new uptime session. This
+reduces both WFU and MTBF. The simulation doesn't take this into account
+yet. Should it?
+\item Add the bandwidth requirements to the simulation. The current
+simulation doesn't make any assumptions about relay bandwidth when
+assigning \texttt{Guard} flags. Which bandwidth value would we use here?
+\item Add another graph similar to Figure~\ref{fig:wfu-sim}, but replace
+the ``Fraction of relays meeting WFU requirement'' on the \emph{x} axis
+with the ``Fraction of \emph{bandwidth} of relays meeting WFU
+requirement.''
+After all, we're interested in having enough bandwidth capacity for the
+entry guard position, not (only) in having enough distinct relays.
+Which bandwidth value would we use here?
+\item Roger suggests to come up with a better metric than ``WFU since we
+first saw a relay.''
+He says ``it seems wrong to make something that we saw earlier have a
+worse WFU than something we saw later, even if they've had identical
+uptimes in that second period.''
+What would be good candidate metrics?
+\item Ponder finding another metric than WFU for future observations. In
+particular, with the current WFU parameters of $0.95$ and $12$ hours, the
+WFU reaches up to 4 months into the future. It seems useful to weight
+uptime in the near future higher than uptime in the farther future, but
+maybe we should use parameters to limit the interval to $1$ or $2$ months.
+\end{itemize}
+}
+
+\section{Forming stable hidden-service directory sets}
+
+{\it
+In this section we should evaluate the current requirements for getting
+the \texttt{HSDir} flag.
+Also, what happened to the number of relays with the \texttt{HSDir} flag
+in August 2010?
+}
+
+\section{Selecting stable relays for a fallback consensus}
+
+{\it
+Is the concept of a fallback consensus still worth considering?
+If so, we should analyze how to identify those relays that are most likely
+to be around and reachable under the same IP address.
+The result of this analysis could lead to adding a new \texttt{Longterm}
+(or \texttt{Fallback}?) flag as suggested in proposal 146.
+% TODO Correctly cite proposal 146 here. -KL
+Maybe the analysis of bridges on stable IP addresses should come first,
+though.
+}
+
+\section{Distributing bridges with stable IP addresses}
+
+{\it
+A possible outcome of this analysis could be to add a new flag
+\texttt{StableAddress} (similar to the \texttt{Longterm} flag from the
+previous section) to bridge network statuses and to change BridgeDB to
+include at least one bridge with this flag in its results.
+One of the challenges of this analysis will be to connect sanitized bridge
+descriptors from two months with each other.
+The sanitized IP addresses of two bridges in two months do not match,
+because we're using a new secret key as input to the hash function every
+month.
+We might be able to correlate the descriptors of running bridges via their
+descriptor publication times or bridge statistics.
+But if that fails, we'll have to run the analysis with only 1 month of
+data at a time.
+}
+
+\section{Discussion and future work}
+
+The approach taken in this analysis was to select relays that are most
+stable in a given context based on their history.
+A different angle to obtain higher relay stability might be to identify
+what properties of a relay have a positive or negative impact on its
+stability.
+For example, relays running a given operating system or given Tor software
+version might have a higher stability than others.
+Possible consequences could be to facilitate setting up relays on a given
+operating system or to improve the upgrade process of the Tor software.
+
+\end{document}
+
diff --git a/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java b/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java
new file mode 100644
index 0000000..6a2d7a9
--- /dev/null
+++ b/task-2911/wfu-sim/SimulateWeightedFractionalUptime.java
@@ -0,0 +1,314 @@
+/**
+ * Simulate variation of weighted fractional uptime on Guard relays. In
+ * a first step, parse network status consensus in consensuses/ from last
+ * to first, calculate future weighted fractional uptimes for all relays,
+ * and write them to disk as one file per network status in
+ * fwfu/$filename. (Skip this step if there is already a fwfu/
+ * directory.) In a second step, parse the network statuse consensus
+ * again, but this time from first to last, calculate past weighted
+ * fractional uptimes for all relays, form relay subsets based on minimal
+ * WFU, look up what the mean future WFU would be for a subset, and write
+ * results to wfu-sim.csv to disk. */
+import java.io.*;
+import java.text.*;
+import java.util.*;
+public class SimulateWeightedFractionalUptime {
+ public static void main(String[] args) throws Exception {
+
+ /* Measure how long this execution takes. */
+ long started = System.currentTimeMillis();
+
+ /* Decide whether we need to do the reverse run, or if we can use
+ * previous results. */
+ if (!new File("fwfu").exists()) {
+
+ /* Scan existing consensus files and sort them in reverse order. */
+ SortedSet<File> allConsensuses =
+ new TreeSet<File>(Collections.reverseOrder());
+ Stack<File> files = new Stack<File>();
+ files.add(new File("consensuses"));
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else {
+ if (file.getName().endsWith("-consensus")) {
+ allConsensuses.add(file);
+ }
+ }
+ }
+
+ /* For each relay as identified by its base-64 encoded fingerprint,
+ * track weighted uptime and total weighted time in a long[2]. */
+ SortedMap<String, long[]> knownRelays =
+ new TreeMap<String, long[]>();
+
+ /* Parse all consensuses in reverse order. */
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long nextWeightingInterval = formatter.parse(allConsensuses.first().
+ getName().substring(0, "yyyy-MM-dd-HH-mm-ss".length())).
+ getTime() / (12L * 60L * 60L * 1000L);
+ for (File consensus : allConsensuses) {
+
+ /* Every 12 hours, weight both uptime and total time of all known
+ * relays with 0.95 (or 19/20 since these are long integers) and
+ * remove all with a weighted fractional uptime below 1/10000. */
+ long validAfter = formatter.parse(consensus.getName().substring(0,
+ "yyyy-MM-dd-HH-mm-ss".length())).getTime();
+ long weightingInterval = validAfter / (12L * 60L * 60L * 1000L);
+ while (weightingInterval < nextWeightingInterval) {
+ Set<String> relaysToRemove = new HashSet<String>();
+ for (Map.Entry<String, long[]> e : knownRelays.entrySet()) {
+ long[] w = e.getValue();
+ w[0] *= 19L;
+ w[0] /= 20L;
+ w[1] *= 19L;
+ w[1] /= 20L;
+ if (((10000L * w[0]) / w[1]) < 1L) {
+ relaysToRemove.add(e.getKey());
+ }
+ }
+ for (String fingerprint : relaysToRemove) {
+ knownRelays.remove(fingerprint);
+ }
+ nextWeightingInterval -= 1L;
+ }
+
+ /* Parse all fingerprints of Running relays from the consensus. */
+ Set<String> fingerprints = new HashSet<String>();
+ BufferedReader br = new BufferedReader(new FileReader(consensus));
+ String line, rLine = null;
+ boolean reachedEnd = false;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("r ")) {
+ rLine = line;
+ } else if (line.startsWith("s ") && line.contains(" Running")) {
+ String[] parts = rLine.split(" ");
+ if (parts.length < 3) {
+ System.out.println("Illegal line '" + rLine + "' in "
+ + consensus + ". Skipping consensus.");
+ continue;
+ } else {
+ String fingerprint = parts[2];
+ if (fingerprint.length() !=
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAA".length()) {
+ System.out.println("Illegal line '" + rLine + "' in "
+ + consensus + ". Skipping consensus.");
+ continue;
+ }
+ fingerprints.add(fingerprint);
+ }
+ } else if (line.startsWith("directory-signature ")) {
+ reachedEnd = true;
+ break;
+ }
+ }
+ br.close();
+ if (!reachedEnd) {
+ System.out.println("Did not reach the consensus end of "
+ + consensus + ". Skipping consensus.");
+ continue;
+ }
+
+ /* Increment weighted uptime for all running relays by 3600
+ * seconds. */
+ for (String fingerprint : fingerprints) {
+ if (!knownRelays.containsKey(fingerprint)) {
+ knownRelays.put(fingerprint, new long[] { 3600L, 0L });
+ } else {
+ knownRelays.get(fingerprint)[0] += 3600L;
+ }
+ }
+
+ /* Increment total weighted time for all relays by 3600 seconds. */
+ for (long[] w : knownRelays.values()) {
+ w[1] += 3600L;
+ }
+
+ /* Write future WFUs for all known relays to disk. */
+ File fwfuFile = new File("fwfu", consensus.getName());
+ fwfuFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(fwfuFile));
+ for (Map.Entry<String, long[]> e : knownRelays.entrySet()) {
+ bw.write(e.getKey() + " "
+ + ((10000L * e.getValue()[0]) / e.getValue()[1]) + "\n");
+ }
+ bw.close();
+ }
+ }
+
+ /* Run the simulation for the following WFU/10000 values: */
+ long[] requiredWFUs = new long[] { 9000, 9100, 9200, 9300, 9400, 9500,
+ 9600, 9700, 9750, 9800, 9850, 9900, 9950, 9975, 9990, 9999 };
+ BufferedWriter bw = new BufferedWriter(new FileWriter("wfu-sim.csv"));
+ bw.write("time");
+ for (long requiredWFU : requiredWFUs) {
+ bw.write(",wfu" + requiredWFU + ",perc85wfu" + requiredWFU
+ + ",perc90wfu" + requiredWFU + ",perc95wfu" + requiredWFU
+ + ",guards" + requiredWFU);
+ }
+ bw.write("\n");
+
+ /* Scan existing consensus files and sort them in forward order. */
+ SortedSet<File> allConsensuses = new TreeSet<File>();
+ Stack<File> files = new Stack<File>();
+ files.add(new File("consensuses"));
+ while (!files.isEmpty()) {
+ File file = files.pop();
+ if (file.isDirectory()) {
+ files.addAll(Arrays.asList(file.listFiles()));
+ } else {
+ if (file.getName().endsWith("-consensus")) {
+ allConsensuses.add(file);
+ }
+ }
+ }
+
+ /* For each relay as identified by its base-64 encoded fingerprint,
+ * track weighted uptime and total weighted time in a long[2]. */
+ SortedMap<String, long[]> knownRelays = new TreeMap<String, long[]>();
+
+ /* Parse all consensuses in forward order. */
+ SimpleDateFormat formatter = new SimpleDateFormat(
+ "yyyy-MM-dd-HH-mm-ss");
+ formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ SimpleDateFormat isoFormatter = new SimpleDateFormat(
+ "yyyy-MM-dd HH:mm:ss");
+ isoFormatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+ long nextWeightingInterval = formatter.parse(allConsensuses.first().
+ getName().substring(0, "yyyy-MM-dd-HH-mm-ss".length())).getTime()
+ / (12L * 60L * 60L * 1000L);
+ for (File consensus : allConsensuses) {
+
+ /* Every 12 hours, weight both uptime and total time of all known
+ * relays with 0.95 (or 19/20 since these are long integers) and
+ * remove all with a weighted fractional uptime below 1/10000. */
+ long validAfter = formatter.parse(consensus.getName().substring(0,
+ "yyyy-MM-dd-HH-mm-ss".length())).getTime();
+ long weightingInterval = validAfter / (12L * 60L * 60L * 1000L);
+ while (weightingInterval > nextWeightingInterval) {
+ Set<String> relaysToRemove = new HashSet<String>();
+ for (Map.Entry<String, long[]> e : knownRelays.entrySet()) {
+ long[] w = e.getValue();
+ w[0] *= 19L;
+ w[0] /= 20L;
+ w[1] *= 19L;
+ w[1] /= 20L;
+ if (((10000L * w[0]) / w[1]) < 1L) {
+ relaysToRemove.add(e.getKey());
+ }
+ }
+ for (String fingerprint : relaysToRemove) {
+ knownRelays.remove(fingerprint);
+ }
+ nextWeightingInterval += 1L;
+ }
+
+ /* Parse all fingerprints of Running relays from the consensus. */
+ Set<String> fingerprints = new HashSet<String>();
+ BufferedReader br = new BufferedReader(new FileReader(consensus));
+ String line, rLine = null;
+ boolean reachedEnd = false;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("r ")) {
+ rLine = line;
+ } else if (line.startsWith("s ") && line.contains(" Running")) {
+ String[] parts = rLine.split(" ");
+ if (parts.length < 3) {
+ System.out.println("Illegal line '" + rLine + "' in "
+ + consensus + ". Skipping consensus.");
+ continue;
+ } else {
+ String fingerprint = parts[2];
+ if (fingerprint.length() !=
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAA".length()) {
+ System.out.println("Illegal line '" + rLine + "' in "
+ + consensus + ". Skipping consensus.");
+ continue;
+ }
+ fingerprints.add(fingerprint);
+ }
+ } else if (line.startsWith("directory-signature ")) {
+ reachedEnd = true;
+ break;
+ }
+ }
+ br.close();
+ if (!reachedEnd) {
+ System.out.println("Did not reach the consensus end of "
+ + consensus + ". Skipping consensus.");
+ continue;
+ }
+
+ /* Increment weighted uptime for all running relays by 3600
+ * seconds. */
+ for (String fingerprint : fingerprints) {
+ if (!knownRelays.containsKey(fingerprint)) {
+ knownRelays.put(fingerprint, new long[] { 3600L, 0L });
+ } else {
+ knownRelays.get(fingerprint)[0] += 3600L;
+ }
+ }
+
+ /* Increment total weighted time for all relays by 3600 seconds. */
+ for (long[] w : knownRelays.values()) {
+ w[1] += 3600L;
+ }
+
+ /* Read previously calculated future WFUs from disk. */
+ Map<String, Long> fwfus = new HashMap<String, Long>();
+ File fwfuFile = new File("fwfu", consensus.getName());
+ if (!fwfuFile.exists()) {
+ System.out.println("Could not find file " + fwfuFile
+ + ". Exiting!");
+ System.exit(1);
+ }
+ br = new BufferedReader(new FileReader(fwfuFile));
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(" ");
+ fwfus.put(parts[0], Long.parseLong(parts[1]));
+ }
+
+ /* Run the simulation for the relays in the current consensus for
+ * various required WFUs. */
+ bw.write(isoFormatter.format(validAfter));
+ for (long requiredWFU : requiredWFUs) {
+ long selectedRelays = 0L,
+ totalRelays = (long) fingerprints.size(), totalFwfu = 0L;
+ List<Long> fwfuList = new ArrayList<Long>();
+ for (String fingerprint : fingerprints) {
+ long[] pwfu = knownRelays.get(fingerprint);
+ long wfu = (10000L * pwfu[0]) / pwfu[1];
+ if (wfu >= requiredWFU) {
+ selectedRelays += 1L;
+ if (fwfus.containsKey(fingerprint)) {
+ long fwfu = fwfus.get(fingerprint);
+ totalFwfu += fwfu;
+ fwfuList.add(fwfu);
+ }
+ }
+ }
+ if (selectedRelays == 0L) {
+ bw.write(",NA,NA,NA,NA");
+ } else {
+ Collections.sort(fwfuList, Collections.reverseOrder());
+ long perc85 = fwfuList.get((85 * fwfuList.size()) / 100);
+ long perc90 = fwfuList.get((90 * fwfuList.size()) / 100);
+ long perc95 = fwfuList.get((95 * fwfuList.size()) / 100);
+ bw.write("," + (totalFwfu / selectedRelays) + "," + perc85
+ + "," + perc90 + "," + perc95);
+ }
+ bw.write("," + (10000L * selectedRelays / totalRelays));
+ }
+ bw.write("\n");
+ }
+ bw.close();
+
+ /* Print how long this execution took and exit. */
+ System.out.println("Execution took " + ((System.currentTimeMillis()
+ - started) / (60L * 1000L)) + " minutes.");
+ }
+}
+
diff --git a/task-2911/wfu-sim/wfu-sim.R b/task-2911/wfu-sim/wfu-sim.R
new file mode 100644
index 0000000..149ce6d
--- /dev/null
+++ b/task-2911/wfu-sim/wfu-sim.R
@@ -0,0 +1,57 @@
+library(ggplot2)
+data <- read.csv("wfu-sim.csv", stringsAsFactors = FALSE)
+
+d <- data[data$time >= '2010' & data$time < '2011', ]
+d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+d <- rbind(
+ data.frame(x = d$guards9000, y = d$wfu9000, sim = "90 %"),
+ data.frame(x = d$guards9500, y = d$wfu9500, sim = "95 %"),
+ data.frame(x = d$guards9800, y = d$wfu9800, sim = "98 % (default)"),
+ data.frame(x = d$guards9900, y = d$wfu9900, sim = "99 %"),
+ data.frame(x = d$guards9990, y = d$wfu9990, sim = "99.9 %"))
+ggplot(d, aes(x = x / 10000.0, y = y / 10000.0)) +
+geom_path() +
+facet_wrap(~ sim) +
+scale_x_continuous("\nFraction of relays meeting WFU requirement",
+ formatter = "percent") +
+scale_y_continuous("Mean WFU in the future\n", formatter = "percent")
+ggsave(filename = "wfu-sim.pdf", width = 8, height = 5, dpi = 100)
+
+## Commented out, because graph is meaningless in b/w.
+#d <- data[data$time >= '2010' & data$time < '2011', ]
+#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+#d <- rbind(
+# data.frame(x = d$guards9000, y = d$wfu9000, sim = "90 %"),
+# data.frame(x = d$guards9500, y = d$wfu9500, sim = "95 %"),
+# data.frame(x = d$guards9800, y = d$wfu9800, sim = "98 % (default)"),
+# data.frame(x = d$guards9900, y = d$wfu9900, sim = "99 %"),
+# data.frame(x = d$guards9990, y = d$wfu9990, sim = "99.9 %"))
+#ggplot(d, aes(x = x / 10000.0, y = y / 10000.0, colour = sim)) +
+#geom_path() +
+#scale_x_continuous("\nFraction of relays meeting WFU requirement",
+# formatter = "percent") +#, trans = "reverse") +
+#scale_y_continuous("Mean WFU \nin the future ",
+# formatter = "percent") +
+#scale_colour_hue("Required WFU") +
+#opts(axis.title.x = theme_text(size = 12 * 0.8, face = "bold",
+# hjust = 0.5),
+# axis.title.y = theme_text(size = 12 * 0.8, face = "bold", vjust = 0.5,
+# hjust = 1))
+#ggsave(filename = "wfu-sim.pdf", width = 8, height = 5, dpi = 100)
+
+## Commented out, because the time plot is not as useful as expected.
+#simulations <- paste("wfu", rev(c(9000, 9200, 9400, 9600, 9800)),
+# sep = "")
+#d <- data[data$time >= '2010' & data$time < '2011',
+# c("time", simulations)]
+#d <- aggregate(d[, 2:length(d)], by = list(date = as.Date(d$time)), mean)
+#d <- melt(d, id.vars = 1)
+#ggplot(d, aes(x = date, y = value / 10000.0, colour = variable)) +
+#geom_line() +
+#scale_x_date("", major = "3 months", minor = "1 month",
+# format = "%b %Y") +
+#scale_y_continuous("Empirical future WFU\n", formatter = "percent") +
+#scale_colour_hue("Required past WFU\n", breaks = simulations,
+# labels = paste(as.numeric(substr(simulations, 4, 9)) / 100.0, "%"))
+#ggsave(filename = "wfu-sim-time.pdf", width = 8, height = 5, dpi = 100)
+
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits