[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Update hidserv-stats extrapolation code (#13192).
commit 3c90c181a13dcc12c69e8e8fa013948b1a6405e2
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Mon Feb 9 18:25:38 2015 +0100
Update hidserv-stats extrapolation code (#13192).
---
task-13192/.gitignore | 1 +
task-13192/src/R/blog.R | 47 ++
task-13192/src/R/plot.R | 214 +++----
task-13192/src/java/Aggregate.java | 129 ++++
task-13192/src/java/Extrapolate.java | 424 +++++++++++++
task-13192/src/java/ExtrapolateHidServStats.java | 722 ----------------------
task-13192/src/java/Simulate.java | 357 +++++++++++
7 files changed, 1066 insertions(+), 828 deletions(-)
diff --git a/task-13192/.gitignore b/task-13192/.gitignore
index 7e8bf3b..89d161b 100644
--- a/task-13192/.gitignore
+++ b/task-13192/.gitignore
@@ -4,4 +4,5 @@ in/
src/bash/
src/bin/
out/
+Rplots.pdf
diff --git a/task-13192/src/R/blog.R b/task-13192/src/R/blog.R
new file mode 100644
index 0000000..82a07e9
--- /dev/null
+++ b/task-13192/src/R/blog.R
@@ -0,0 +1,47 @@
+# Load required libraries.
+require(ggplot2, warn.conflicts = FALSE, quietly = TRUE)
+require(scales, warn.conflicts = FALSE, quietly = TRUE)
+require(reshape, warn.conflicts = FALSE, quietly = TRUE)
+require(splines, warn.conflicts = FALSE, quietly = TRUE)
+require(Hmisc, warn.conflicts = FALSE, quietly = TRUE)
+
+# Read .csv files written by Java.
+h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE)
+
+# Create directories for graphs.
+dir.create(file.path("out", "graphs", "blog"), showWarnings = FALSE,
+ recursive = TRUE)
+
+# Cut off last two days, because stats might be incomplete for those.
+h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ]
+
+# Graph the number of reported stats by day.
+h7 <- data.frame(date = as.Date(h$stats_end), reports = 1)
+ggplot(h7, aes(x = date)) +
+geom_bar(colour = 'lightgray', width = .7, binwidth = 1) +
+scale_x_date("") +
+scale_y_continuous("")
+ggsave("out/graphs/blog/num-reported-stats.png", width = 10, height = 3,
+ dpi = 100)
+
+e <- read.csv("out/csv/hidserv-stats-extrapolated.csv",
+ stringsAsFactors = FALSE)
+e <- melt(e, by = c("date", "type"))
+e <- e[e$variable == "wiqm", ]
+e <- rbind(e, data.frame(date = NA, type = c("onions", "cells"),
+ variable = NA, value = 0))
+
+ggplot(e[e$type == "cells", ], aes(x = as.Date(date), y = value)) +
+geom_line() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "")
+ggsave("out/graphs/blog/extrapolated-cells.png", width = 10,
+ height = 3, dpi = 100)
+
+ggplot(e[e$type != "cells", ], aes(x = as.Date(date), y = value)) +
+geom_line() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "")
+ggsave("out/graphs/blog/extrapolated-onions.png", width = 10,
+ height = 3, dpi = 100)
+
diff --git a/task-13192/src/R/plot.R b/task-13192/src/R/plot.R
index 991928b..552b810 100644
--- a/task-13192/src/R/plot.R
+++ b/task-13192/src/R/plot.R
@@ -5,17 +5,12 @@ require(reshape, warn.conflicts = FALSE, quietly = TRUE)
require(splines, warn.conflicts = FALSE, quietly = TRUE)
require(Hmisc, warn.conflicts = FALSE, quietly = TRUE)
-# Avoid scientific notation.
-options(scipen = 15)
-
# Read .csv file written by Java.
h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE)
# Create directories for graphs.
dir.create(file.path("out", "graphs", "report"), showWarnings = FALSE,
recursive = TRUE)
-dir.create(file.path("out", "graphs", "slides"), showWarnings = FALSE,
- recursive = TRUE)
# Cut off last two days, because stats might be incomplete for those.
h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ]
@@ -28,17 +23,15 @@ scale_x_date("") +
scale_y_continuous("")
ggsave("out/graphs/report/num-reported-stats.pdf", width = 10, height = 3,
dpi = 100)
-ggsave("out/graphs/slides/hidserv-12.png", width = 8, height = 3,
- dpi = 100)
# Graph distributions of reported values by day.
h1 <- data.frame(date = as.Date(h$stats_end),
- traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
+ traffic = h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6),
services = h$hidserv_dir_onions_seen)
h1 <- melt(h1, "date")
h1 <- data.frame(date = h1$date,
- variable = ifelse(h1$variable == "traffic", "traffic in MB/s",
- ".onion addresses"), value = h1$value)
+ variable = ifelse(h1$variable == "traffic", "traffic in Mbit/s",
+ "unique .onion addresses"), value = h1$value)
ggplot(h1, aes(x = date, y = value, group = date)) +
geom_boxplot() +
facet_grid(variable ~ ., scales = "free_y") +
@@ -49,23 +42,22 @@ ggsave("out/graphs/report/stats-by-day.pdf", width = 10, height = 5,
# Graph distributions of calculated fractions by day.
h2 <- data.frame(date = as.Date(h$stats_end),
- prob_rend_point = h$prob_rend_point,
- x_frac_hsdesc = h$frac_hsdesc / 3.0)
+ frac_rend_relayed_cells = h$frac_rend_relayed_cells, x_frac_dir_onions_seen = h$frac_dir_onions_seen)
h2 <- melt(h2, "date")
h2 <- data.frame(date = h2$date,
- variable = ifelse(h2$variable == "prob_rend_point",
- "selected as rendezvous point", "responsible for a descriptor"),
+ variable = ifelse(h2$variable == "frac_rend_relayed_cells",
+ "cells on rendezvous circuits", "hidden-service descriptors"),
value = h2$value)
ggplot(h2, aes(x = date, y = value, group = date)) +
geom_boxplot() +
facet_grid(variable ~ ., scales = "free_y") +
scale_x_date("") +
-scale_y_continuous("Calculated probabilities\n", labels = percent)
+scale_y_continuous("Calculated network fractions\n", labels = percent)
ggsave("out/graphs/report/probs-by-relay.pdf", width = 10, height = 5,
dpi = 100)
# Graph ECDF of cells reported by relays with rend point probability of 0.
-h8 <- h[h$prob_rend_point == 0,
+h8 <- h[h$frac_rend_relayed_cells == 0,
"hidserv_rend_relayed_cells" ]
h8 <- sort(h8)
h8 <- data.frame(x = h8, y = (1:length(h8)) / length(h8))
@@ -75,13 +67,14 @@ laplace_cells <- function(x) {
ggplot(h8, aes(x = x, y = y)) +
geom_line() +
stat_function(fun = laplace_cells, colour = "blue") +
-scale_x_continuous("\nReported cells on rendezvous circuits") +
-scale_y_continuous("Cumulative probability\n")
+scale_x_continuous("\nReported cells on rendezvous circuits",
+ limits = c(max(h8[h8$y < 0.01, "x"]), min(h8[h8$y > 0.99, "x"]))) +
+scale_y_continuous("Cumulative probability\n", labels = percent)
ggsave("out/graphs/report/zero-prob-cells.pdf", width = 5, height = 3,
dpi = 100)
# Graph ECDF of .onions reported by relays with HSDir probability of 0.
-h9 <- h[h$frac_hsdesc == 0, "hidserv_dir_onions_seen"]
+h9 <- h[h$frac_dir_onions_seen == 0, "hidserv_dir_onions_seen"]
h9 <- sort(h9)
h9 <- data.frame(x = h9, y = (1:length(h9)) / length(h9))
laplace_onions <- function(x) {
@@ -90,51 +83,54 @@ laplace_onions <- function(x) {
ggplot(h9, aes(x = x, y = y)) +
geom_line() +
stat_function(fun = laplace_onions, colour = "blue") +
-scale_x_continuous("\nReported .onion addresses") +
-scale_y_continuous("Cumulative probability\n")
+scale_x_continuous("\nReported .onion addresses",
+ limits = c(max(h9[h9$y < 0.01, "x"]), min(h9[h9$y > 0.99, "x"]))) +
+scale_y_continuous("Cumulative probability\n", labels = percent)
ggsave("out/graphs/report/zero-prob-onions.pdf", width = 5, height = 3,
dpi = 100)
# Graph correlation between reports and fractions per relay.
h3 <- rbind(
- data.frame(x = h$frac_hsdesc / 3.0,
- y = ifelse(h$frac_hsdesc == 0, NA, h$hidserv_dir_onions_seen),
+ data.frame(x = h$frac_dir_onions_seen,
+ y = ifelse(h$frac_dir_onions_seen == 0, NA, h$hidserv_dir_onions_seen),
facet = ".onion addresses"),
- data.frame(x = h$prob_rend_point,
- y = ifelse(h$prob_rend_point == 0, NA,
- h$hidserv_rend_relayed_cells * 512 / (86400 * 1000)),
- facet = "traffic in kB/s"))
+ data.frame(x = h$frac_rend_relayed_cells,
+ y = ifelse(h$frac_rend_relayed_cells == 0, NA,
+ h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6)),
+ facet = "traffic in Mbit/s"))
ggplot(h3[h3$facet == ".onion addresses", ], aes(x = x, y = y)) +
geom_point(alpha = 0.5) +
stat_smooth(method = "lm") +
-scale_x_continuous(name = "\nProbability", labels = percent) +
+scale_x_continuous(name = "\nFraction", labels = percent) +
scale_y_continuous(name = "Reported .onion addresses\n")
ggsave("out/graphs/report/corr-probs-onions-by-relay.pdf", width = 5,
height = 3, dpi = 100)
-ggplot(h3[h3$facet == "traffic in kB/s", ], aes(x = x, y = y)) +
+ggplot(h3[h3$facet == "traffic in Mbit/s", ], aes(x = x, y = y)) +
geom_point(alpha = 0.5) +
stat_smooth(method = "lm") +
-scale_x_continuous(name = "\nProbability", labels = percent) +
-scale_y_continuous(name = "Reported traffic in kB/s\n")
+scale_x_continuous(name = "\nFraction", labels = percent) +
+scale_y_continuous(name = "Reported traffic in Mbit/s\n")
ggsave("out/graphs/report/corr-probs-cells-by-relay.pdf", width = 5,
height = 3, dpi = 100)
# Graph correlation between reports and fractions per day.
h5 <- rbind(
data.frame(date = as.Date(h$stats_end),
- prob = ifelse(h$frac_hsdesc == 0, NA, h$frac_hsdesc / 3.0),
+ prob = ifelse(h$frac_dir_onions_seen == 0, NA, h$frac_dir_onions_seen),
reported = h$hidserv_dir_onions_seen, facet = "published descriptor"),
data.frame(date = as.Date(h$stats_end),
- prob = ifelse(h$prob_rend_point == 0, NA, h$prob_rend_point),
- reported = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
- facet = "traffic in MB/s"))
+ prob = ifelse(h$frac_rend_relayed_cells == 0, NA,
+ h$frac_rend_relayed_cells),
+ reported = h$hidserv_rend_relayed_cells * 512 * 8 / (86400 * 1e6),
+ facet = "traffic in Mbit/s"))
h5 <- na.omit(h5)
h5 <- aggregate(list(prob = h5$prob, reported = h5$reported),
by = list(date = h5$date, facet = h5$facet), FUN = sum)
-ggplot(h5[h5$facet == "traffic in MB/s", ], aes(x = prob, y = reported)) +
+ggplot(h5[h5$facet == "traffic in Mbit/s", ],
+ aes(x = prob, y = reported)) +
geom_point(alpha = 0.5) +
scale_x_continuous(name = "\nTotal probability", labels = percent) +
-scale_y_continuous(name = "Total traffic in MB/s\n") +
+scale_y_continuous(name = "Total traffic in Mbit/s\n") +
stat_smooth(method = "lm") +
geom_vline(xintercept = 0.01, linetype = 2)
ggsave("out/graphs/report/corr-probs-cells-by-day.pdf", width = 5,
@@ -149,98 +145,104 @@ geom_vline(xintercept = 0.01, linetype = 2)
ggsave("out/graphs/report/corr-probs-onions-by-day.pdf", width = 5,
height = 3, dpi = 100)
+# Graph ECDF of extrapolated cells.
+h20 <- h[h$frac_rend_relayed_cells > 0, ]
+h20 <- h20$hidserv_rend_relayed_cells *
+ 512 * 8 / (86400 * 1e6 * h20$frac_rend_relayed_cells)
+h20 <- sort(h20)
+h20 <- data.frame(x = h20, y = (1:length(h20)) / length(h20))
+ggplot(h20, aes(x = x, y = y)) +
+geom_line() +
+scale_x_continuous("\nExtrapolated total traffic in Mbit/s",
+ limits = c(max(h20[h20$y < 0.01, "x"]), min(h20[h20$y > 0.99, "x"]))) +
+scale_y_continuous("Cumulative probability\n", labels = percent)
+ggsave("out/graphs/report/extrapolated-cells.pdf", width = 5, height = 3,
+ dpi = 100)
+
+# Graph ECDF of extrapolated .onions.
+h21 <- h[h$frac_dir_onions_seen > 0, ]
+h21 <- h21$hidserv_dir_onions_seen / (12 * h21$frac_dir_onions_seen)
+h21 <- sort(h21)
+h21 <- data.frame(x = h21, y = (1:length(h21)) / length(h21))
+ggplot(h21, aes(x = x, y = y)) +
+geom_line() +
+scale_x_continuous("\nExtrapolated total .onion addresses",
+ limits = c(max(h21[h21$y < 0.01, "x"]), min(h21[h21$y > 0.99, "x"]))) +
+scale_y_continuous("Cumulative probability\n", labels = percent)
+ggsave("out/graphs/report/extrapolated-onions.pdf", width = 5, height = 3,
+ dpi = 100)
+
# Graph extrapolated network totals.
-h6 <- data.frame(date = as.Date(h$stats_end),
- traffic = ifelse(h$prob_rend_point == 0, 0,
- h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000)),
- prob_rend_point = h$prob_rend_point,
- onions = ifelse(h$frac_hsdesc == 0, 0, h$hidserv_dir_onions_seen),
- prob_onion = h$frac_hsdesc * 4.0)
-h6 <- aggregate(list(traffic = h6$traffic,
- prob_rend_point = h6$prob_rend_point,
- onions = h6$onions,
- prob_onion = h6$prob_onion), by = list(date = h6$date), FUN = sum)
-h6 <- data.frame(date = h6$date,
- traffic = ifelse(h6$prob_rend_point < 0.01, 0,
- h6$traffic / h6$prob_rend_point),
- onions = ifelse(h6$prob_onion / 12.0 < 0.01, 0,
- h6$onions / h6$prob_onion))
-h6 <- melt(h6, "date")
-h6 <- h6[h6$value > 0, ]
-h6 <- rbind(h6, data.frame(date = NA, variable = c('traffic', 'onions'),
- value = 0))
-h6 <- data.frame(date = h6$date,
- variable = ifelse(h6$variable == "traffic", "total traffic in MB/s",
- ".onion addresses"), value = h6$value)
-ggplot(h6, aes(date, value)) +
-facet_grid(variable ~ ., scales = "free_y") +
-geom_point() +
-stat_smooth() +
+e <- read.csv("out/csv/hidserv-stats-extrapolated.csv",
+ stringsAsFactors = FALSE)
+e <- melt(e, by = c("date", "type"))
+e <- e[e$variable == "wiqm", ]
+e <- rbind(e, data.frame(date = NA, type = c("onions", "cells"),
+ variable = NA, value = 0))
+e <- data.frame(e, label = ifelse(e$type == "cells", "traffic in Mbit/s",
+ "unique .onion addresses"))
+ggplot(e, aes(x = as.Date(date), y = value)) +
+geom_line() +
+facet_grid(label ~ ., scales = "free_y") +
scale_x_date(name = "") +
scale_y_continuous(name = "Extrapolated network totals\n")
ggsave("out/graphs/report/extrapolated-network-totals.pdf", width = 10,
height = 5, dpi = 100)
-# Graph extrapolated number of .onion addresses.
-h11 <- h6[h6$variable == ".onion addresses", ]
-ggplot(h11, aes(x = date, y = value)) +
-geom_point() +
-stat_smooth() +
-scale_x_date(name = "") +
-scale_y_continuous(name = "")
-ggsave("out/graphs/slides/hidserv-13.png", width = 8, height = 3,
- dpi = 100)
-
-# Graph extrapolated fraction of hidden-service traffic.
-b <- read.csv("in/metrics/bandwidth.csv", stringsAsFactors = FALSE)
-b <- b[b$isexit == '' & b$isguard == '' & b$date > '2014-12-20', ]
-h10 <- data.frame(date = as.Date(h$stats_end),
- traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
- prob_rend_point = h$prob_rend_point)
-h10 <- aggregate(list(traffic = h10$traffic,
- prob_rend_point = h10$prob_rend_point), by = list(date = h10$date),
- FUN = sum)
-h10 <- data.frame(date = h10$date,
- traffic = ifelse(h10$prob_rend_point < 0.01, 0,
- h10$traffic / h10$prob_rend_point))
-h10 <- melt(h10, "date")
-h10 <- h10[h10$value > 0, ]
-h10 <- rbind(h10, data.frame(date = as.Date(b$date), variable = "bw",
- value = b$bwread + b$bwwrite))
-h10 <- cast(h10, date ~ variable, value = "value")
-h10 <- na.omit(h10)
-h10 <- data.frame(date = h10$date,
- value = h10$traffic * 1000 * 1000 / h10$bw)
-h10 <- rbind(h10, data.frame(date = NA, value = 0))
-ggplot(h10, aes(x = date, y = value)) +
-geom_point() +
-scale_x_date(name = "") +
-scale_y_continuous(name = "", labels = percent) +
-stat_smooth()
-ggsave("out/graphs/slides/hidserv-14.png", width = 8, height = 3,
+# Graph distributions of calculated fractions by day.
+h71 <- data.frame(date = as.Date(h$stats_end),
+ frac_rend_relayed_cells = h$frac_rend_relayed_cells,
+ frac_dir_onions_seen = h$frac_dir_onions_seen)
+summary(h71)
+h71 <- aggregate(list(
+ frac_rend_relayed_cells = h71$frac_rend_relayed_cells,
+ frac_dir_onions_seen = h71$frac_dir_onions_seen),
+ by = list(date = h71$date), FUN = sum)
+summary(h71)
+h71 <- melt(h71, "date")
+summary(h71)
+h71 <- data.frame(date = h71$date,
+ variable = ifelse(h71$variable == "frac_rend_relayed_cells",
+ "cells on rendezvous circuits", "hidden-service descriptors"),
+ value = h71$value)
+ggplot(h71, aes(x = date, y = value)) +
+geom_line() +
+facet_grid(variable ~ ., scales = "free_y") +
+geom_hline(yintercept = 0.01, linetype = 2) +
+scale_x_date("") +
+scale_y_continuous("Total calculated network fractions per day\n",
+ labels = percent)
+ggsave("out/graphs/report/probs-by-day.pdf", width = 10, height = 5,
dpi = 100)
# Graph simulation results for cells on rendezvous circuits.
s <- read.csv("out/csv/sim-cells.csv")
-ggplot(s, aes(x = frac, y = (p500 - 1e10) / 1e10,
- ymin = (p025 - 1e10) / 1e10, ymax = (p975 - 1e10) / 1e10)) +
+s <- do.call(data.frame, aggregate(list(X = s$wiqm),
+ by = list(frac = s$frac), FUN = quantile, probs = c(0.025, 0.5, 0.975)))
+ggplot(s, aes(x = frac, y = (X.50. - 1e10) / 1e10,
+ ymin = (X.2.5. - 1e10) / 1e10, ymax = (X.97.5. - 1e10) / 1e10)) +
geom_line() +
geom_ribbon(alpha = 0.2) +
scale_x_continuous("\nRendezvous points included in extrapolation",
labels = percent) +
-scale_y_continuous("Deviation from network totals\n", labels = percent)
+scale_y_continuous("Deviation from actual\nhidden-service traffic\n",
+ labels = percent)
ggsave("out/graphs/report/sim-cells.pdf", width = 5, height = 3,
dpi = 100)
# Graph simulation results for .onion addresses.
o <- read.csv("out/csv/sim-onions.csv")
-ggplot(o, aes(x = frac, y = (p500 - 40000) / 40000,
- ymin = (p025 - 40000) / 40000, ymax = (p975 - 40000) / 40000)) +
+o <- do.call(data.frame, aggregate(list(X = o$wiqm),
+ by = list(frac = o$frac), FUN = quantile, probs = c(0.025, 0.5, 0.975)))
+ggplot(o, aes(x = frac, y = (X.50. / 12 - 40000) / 40000,
+ ymin = (X.2.5. / 12 - 40000) / 40000,
+ ymax = (X.97.5. / 12 - 40000) / 40000)) +
geom_line() +
geom_ribbon(alpha = 0.2) +
scale_x_continuous("\nDirectories included in extrapolation",
labels = percent) +
-scale_y_continuous("Deviation from network totals\n", labels = percent)
+scale_y_continuous("Deviation from actual\nnumber of .onions\n",
+ labels = percent)
ggsave("out/graphs/report/sim-onions.pdf", width = 5, height = 3,
dpi = 100)
diff --git a/task-13192/src/java/Aggregate.java b/task-13192/src/java/Aggregate.java
new file mode 100644
index 0000000..56bac2c
--- /dev/null
+++ b/task-13192/src/java/Aggregate.java
@@ -0,0 +1,129 @@
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+
+public class Aggregate {
+
+ private static File hidservStatsCsvFile =
+ new File("out/csv/hidserv-stats.csv");
+
+ private static File hidservStatsExtrapolatedCsvFile =
+ new File("out/csv/hidserv-stats-extrapolated.csv");
+
+ public static void main(String[] args) throws Exception {
+ aggregate();
+ }
+
+ private static final DateFormat DATE_TIME_FORMAT, DATE_FORMAT;
+
+ static {
+ DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ DATE_TIME_FORMAT.setLenient(false);
+ DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+ DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
+ DATE_FORMAT.setLenient(false);
+ DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ private static void aggregate() throws Exception {
+ if (!hidservStatsCsvFile.exists() ||
+ hidservStatsCsvFile.isDirectory()) {
+ System.err.println("Unable to read "
+ + hidservStatsCsvFile.getAbsolutePath() + ". Exiting.");
+ System.exit(1);
+ }
+ SortedMap<String, List<double[]>>
+ extrapolatedCells = new TreeMap<String, List<double[]>>(),
+ extrapolatedOnions = new TreeMap<String, List<double[]>>();
+ BufferedReader br = new BufferedReader(new FileReader(
+ hidservStatsCsvFile));
+ String line = br.readLine();
+ while ((line = br.readLine()) != null) {
+ String[] parts = line.split(",");
+ String date = DATE_FORMAT.format(DATE_TIME_FORMAT.parse(parts[2]));
+ double hidservRendRelayedCells = Double.parseDouble(parts[3]),
+ hidservDirOnionsSeen = Double.parseDouble(parts[4]),
+ fracRendRelayedCells = Double.parseDouble(parts[5]),
+ fracDirOnionsSeen = Double.parseDouble(parts[6]);
+
+ if (fracRendRelayedCells > 0.0) {
+ if (!extrapolatedCells.containsKey(date)) {
+ extrapolatedCells.put(date, new ArrayList<double[]>());
+ }
+ extrapolatedCells.get(date).add(new double[] {
+ hidservRendRelayedCells * 512.0 * 8.0
+ / (86400.0 * 1000000.0 * fracRendRelayedCells),
+ fracRendRelayedCells });
+ }
+ if (fracDirOnionsSeen > 0.0) {
+ if (!extrapolatedOnions.containsKey(date)) {
+ extrapolatedOnions.put(date, new ArrayList<double[]>());
+ }
+ extrapolatedOnions.get(date).add(new double[] {
+ hidservDirOnionsSeen / (12.0 * fracDirOnionsSeen),
+ fracDirOnionsSeen });
+ }
+ }
+ br.close();
+ hidservStatsExtrapolatedCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ hidservStatsExtrapolatedCsvFile));
+ bw.write("date,type,wmean,wmedian,wiqm\n");
+ for (int i = 0; i < 2; i++) {
+ String type = i == 0 ? "cells" : "onions";
+ SortedMap<String, List<double[]>> extrapolated = i == 0
+ ? extrapolatedCells : extrapolatedOnions;
+ for (Map.Entry<String, List<double[]>> e :
+ extrapolated.entrySet()) {
+ String date = e.getKey();
+ List<double[]> weightedValues = e.getValue();
+ double totalFrac = 0.0;
+ for (double[] d : weightedValues) {
+ totalFrac += d[1];
+ }
+ if (totalFrac < 0.01) {
+ continue;
+ }
+ Collections.sort(weightedValues,
+ new Comparator<double[]>() {
+ public int compare(double[] o1, double[] o2) {
+ return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+ }
+ });
+ double totalWeighted = 0.0, totalProbability = 0.0;
+ double totalInterquartileFrac = 0.0,
+ totalWeightedInterquartile = 0.0;
+ Double weightedMedian = null;
+ for (double[] d : weightedValues) {
+ totalWeighted += d[0] * d[1];
+ totalProbability += d[1];
+ if (weightedMedian == null &&
+ totalProbability > totalFrac * 0.5) {
+ weightedMedian = d[0];
+ }
+ if (totalProbability >= totalFrac * 0.25 &&
+ totalProbability - d[1] <= totalFrac * 0.75) {
+ totalWeightedInterquartile += d[0] * d[1];
+ totalInterquartileFrac += d[1];
+ }
+ }
+ bw.write(String.format("%s,%s,%.0f,%.0f,%.0f%n", date, type,
+ totalWeighted / totalProbability, weightedMedian,
+ totalWeightedInterquartile / totalInterquartileFrac));
+ }
+ }
+ bw.close();
+ }
+}
diff --git a/task-13192/src/java/Extrapolate.java b/task-13192/src/java/Extrapolate.java
new file mode 100644
index 0000000..29ff518
--- /dev/null
+++ b/task-13192/src/java/Extrapolate.java
@@ -0,0 +1,424 @@
+import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileWriter;
+import java.math.BigInteger;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+public class Extrapolate {
+
+ private static File archiveExtraInfosDirectory =
+ new File("in/collector/archive/relay-descriptors/extra-infos/");
+
+ private static File recentExtraInfosDirectory =
+ new File("in/collector/recent/relay-descriptors/extra-infos/");
+
+ private static File archiveConsensuses =
+ new File("in/collector/archive/relay-descriptors/consensuses/");
+
+ private static File recentConsensuses =
+ new File("in/collector/recent/relay-descriptors/consensuses/");
+
+ private static File hidservStatsCsvFile =
+ new File("out/csv/hidserv-stats.csv");
+
+ public static void main(String[] args) throws Exception {
+ System.out.println("Extracting hidserv-* lines from extra-info "
+ + "descriptors...");
+ SortedMap<String, SortedSet<HidServStats>> hidServStats =
+ extractHidServStats();
+ System.out.println("Extracting fractions from consensuses...");
+ SortedMap<String, SortedSet<ConsensusFraction>> consensusFractions =
+ extractConsensusFractions(hidServStats.keySet());
+ System.out.println("Extrapolating statistics...");
+ extrapolateHidServStats(hidServStats, consensusFractions);
+ System.out.println(new Date() + " Terminating.");
+ }
+
+ private static final DateFormat DATE_TIME_FORMAT;
+
+ static {
+ DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ DATE_TIME_FORMAT.setLenient(false);
+ DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ private static class HidServStats implements Comparable<HidServStats> {
+
+ /* Hidden-service statistics end timestamp in milliseconds. */
+ private long statsEndMillis;
+
+ /* Statistics interval length in seconds. */
+ private long statsIntervalSeconds;
+
+ /* Number of relayed cells reported by the relay and adjusted by
+ * rounding to the nearest right side of a bin and subtracting half of
+ * the bin size. */
+ private long rendRelayedCells;
+
+ /* Number of .onions reported by the relay and adjusted by rounding to
+ * the nearest right side of a bin and subtracting half of the bin
+ * size. */
+ private long dirOnionsSeen;
+
+ private HidServStats(long statsEndMillis, long statsIntervalSeconds,
+ long rendRelayedCells, long dirOnionsSeen) {
+ this.statsEndMillis = statsEndMillis;
+ this.statsIntervalSeconds = statsIntervalSeconds;
+ this.rendRelayedCells = rendRelayedCells;
+ this.dirOnionsSeen = dirOnionsSeen;
+ }
+
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof HidServStats)) {
+ return false;
+ }
+ HidServStats other = (HidServStats) otherObject;
+ return this.statsEndMillis == other.statsEndMillis &&
+ this.statsIntervalSeconds == other.statsIntervalSeconds &&
+ this.rendRelayedCells == other.rendRelayedCells &&
+ this.dirOnionsSeen == other.dirOnionsSeen;
+ }
+
+ @Override
+ public int compareTo(HidServStats other) {
+ return this.statsEndMillis < other.statsEndMillis ? -1 :
+ this.statsEndMillis > other.statsEndMillis ? 1 : 0;
+ }
+ }
+
+ /* Extract fingerprint and hidserv-* lines from extra-info descriptors
+ * located in in/{archive,recent}/relay-descriptors/extra-infos/. */
+ private static SortedMap<String, SortedSet<HidServStats>>
+ extractHidServStats() {
+ SortedMap<String, SortedSet<HidServStats>> extractedHidServStats =
+ new TreeMap<String, SortedSet<HidServStats>>();
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(archiveExtraInfosDirectory);
+ descriptorReader.addDirectory(recentExtraInfosDirectory);
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof ExtraInfoDescriptor)) {
+ continue;
+ }
+ String fingerprint =
+ ((ExtraInfoDescriptor) descriptor).getFingerprint();
+ Scanner scanner = new Scanner(new ByteArrayInputStream(
+ descriptor.getRawDescriptorBytes()));
+ Long statsEndMillis = null, statsIntervalSeconds = null,
+ rendRelayedCells = null, dirOnionsSeen = null;
+ try {
+ while (scanner.hasNext()) {
+ String line = scanner.nextLine();
+ if (line.startsWith("hidserv-")) {
+ String[] parts = line.split(" ");
+ if (parts[0].equals("hidserv-stats-end")) {
+ if (parts.length != 5 || !parts[3].startsWith("(") ||
+ !parts[4].equals("s)")) {
+ /* Will warn below, because statsEndMillis and
+ * statsIntervalSeconds are still null. */
+ continue;
+ }
+ statsEndMillis = DATE_TIME_FORMAT.parse(
+ parts[1] + " " + parts[2]).getTime();
+ statsIntervalSeconds =
+ Long.parseLong(parts[3].substring(1));
+ } else if (parts[0].equals("hidserv-rend-relayed-cells")) {
+ if (parts.length != 5 ||
+ !parts[4].startsWith("bin_size=")) {
+ /* Will warn below, because rendRelayedCells is still
+ * null. */
+ continue;
+ }
+ rendRelayedCells = removeNoise(Long.parseLong(parts[1]),
+ Long.parseLong(parts[4].substring(9)));
+ } else if (parts[0].equals("hidserv-dir-onions-seen")) {
+ if (parts.length != 5 ||
+ !parts[4].startsWith("bin_size=")) {
+ /* Will warn below, because dirOnionsSeen is still
+ * null. */
+ continue;
+ }
+ dirOnionsSeen = removeNoise(Long.parseLong(parts[1]),
+ Long.parseLong(parts[4].substring(9)));
+ }
+ }
+ }
+ } catch (ParseException e) {
+ e.printStackTrace();
+ continue;
+ } catch (NumberFormatException e) {
+ e.printStackTrace();
+ continue;
+ }
+ if (statsEndMillis == null && statsIntervalSeconds == null &&
+ rendRelayedCells == null && dirOnionsSeen == null) {
+ continue;
+ } else if (statsEndMillis != null && statsIntervalSeconds != null
+ && rendRelayedCells != null && dirOnionsSeen != null) {
+ if (!extractedHidServStats.containsKey(fingerprint)) {
+ extractedHidServStats.put(fingerprint,
+ new TreeSet<HidServStats>());
+ }
+ extractedHidServStats.get(fingerprint).add(new HidServStats(
+ statsEndMillis, statsIntervalSeconds, rendRelayedCells,
+ dirOnionsSeen));
+ } else {
+ System.err.println("Relay " + fingerprint + " published "
+ + "incomplete hidserv-stats. Ignoring.");
+ }
+ }
+ }
+ return extractedHidServStats;
+ }
+
+ private static long removeNoise(long reportedNumber, long binSize) {
+ long roundedToNearestRightSideOfTheBin =
+ ((reportedNumber + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ return subtractedHalfOfBinSize;
+ }
+
+ private static class ConsensusFraction
+ implements Comparable<ConsensusFraction> {
+
+ /* Valid-after timestamp of the consensus in milliseconds. */
+ private long validAfterMillis;
+
+ /* Fresh-until timestamp of the consensus in milliseconds. */
+ private long freshUntilMillis;
+
+ /* Probability for being selected by clients as rendezvous point. */
+ private double probabilityRendezvousPoint;
+
+ /* Probability for being selected as directory. This is the fraction
+ * of descriptors identifiers that this relay has been responsible
+ * for, divided by 3. */
+ private double fractionResponsibleDescriptors;
+
+ private ConsensusFraction(long validAfterMillis,
+ long freshUntilMillis,
+ double probabilityRendezvousPoint,
+ double fractionResponsibleDescriptors) {
+ this.validAfterMillis = validAfterMillis;
+ this.freshUntilMillis = freshUntilMillis;
+ this.probabilityRendezvousPoint = probabilityRendezvousPoint;
+ this.fractionResponsibleDescriptors =
+ fractionResponsibleDescriptors;
+ }
+
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof ConsensusFraction)) {
+ return false;
+ }
+ ConsensusFraction other = (ConsensusFraction) otherObject;
+ return this.validAfterMillis == other.validAfterMillis &&
+ this.freshUntilMillis == other.freshUntilMillis &&
+ this.fractionResponsibleDescriptors ==
+ other.fractionResponsibleDescriptors &&
+ this.probabilityRendezvousPoint ==
+ other.probabilityRendezvousPoint;
+ }
+
+ @Override
+ public int compareTo(ConsensusFraction other) {
+ return this.validAfterMillis < other.validAfterMillis ? -1 :
+ this.validAfterMillis > other.validAfterMillis ? 1 : 0;
+ }
+ }
+
+ /* Extract fractions that relays were responsible for from consensuses
+ * located in in/{archive,recent}/relay-descriptors/consensuses/. */
+ private static SortedMap<String, SortedSet<ConsensusFraction>>
+ extractConsensusFractions(Collection<String> fingerprints) {
+ SortedMap<String, SortedSet<ConsensusFraction>>
+ extractedConsensusFractions =
+ new TreeMap<String, SortedSet<ConsensusFraction>>();
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(archiveConsensuses);
+ descriptorReader.addDirectory(recentConsensuses);
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
+ continue;
+ }
+ RelayNetworkStatusConsensus consensus =
+ (RelayNetworkStatusConsensus) descriptor;
+ SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList(
+ "Wmg,Wmm,Wme,Wmd".split(",")));
+ weightKeys.removeAll(consensus.getBandwidthWeights().keySet());
+ if (!weightKeys.isEmpty()) {
+ System.err.println("Consensus with valid-after time "
+ + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis())
+ + " doesn't contain expected Wmx weights. Skipping.");
+ continue;
+ }
+ double wmg = ((double) consensus.getBandwidthWeights().get("Wmg"))
+ / 10000.0;
+ double wmm = ((double) consensus.getBandwidthWeights().get("Wmm"))
+ / 10000.0;
+ double wme = ((double) consensus.getBandwidthWeights().get("Wme"))
+ / 10000.0;
+ double wmd = ((double) consensus.getBandwidthWeights().get("Wmd"))
+ / 10000.0;
+ SortedSet<String> hsDirs = new TreeSet<String>(
+ Collections.reverseOrder());
+ double totalWeightsRendezvousPoint = 0.0;
+ SortedMap<String, Double> weightsRendezvousPoint =
+ new TreeMap<String, Double>();
+ for (Map.Entry<String, NetworkStatusEntry> e :
+ consensus.getStatusEntries().entrySet()) {
+ String fingerprint = e.getKey();
+ NetworkStatusEntry statusEntry = e.getValue();
+ SortedSet<String> flags = statusEntry.getFlags();
+ if (flags.contains("HSDir")) {
+ hsDirs.add(statusEntry.getFingerprint());
+ }
+ double weightRendezvousPoint = 0.0;
+ if (flags.contains("Fast")) {
+ weightRendezvousPoint = (double) statusEntry.getBandwidth();
+ if (flags.contains("Guard") && flags.contains("Exit")) {
+ weightRendezvousPoint *= wmd;
+ } else if (flags.contains("Guard")) {
+ weightRendezvousPoint *= wmg;
+ } else if (flags.contains("Exit")) {
+ weightRendezvousPoint *= wme;
+ } else {
+ weightRendezvousPoint *= wmm;
+ }
+ }
+ weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
+ totalWeightsRendezvousPoint += weightRendezvousPoint;
+ }
+ /* Add all HSDir fingerprints with leading "0" and "1" to
+ * simplify the logic to traverse the ring start. */
+ SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs);
+ hsDirs.clear();
+ for (String fingerprint : hsDirsCopy) {
+ hsDirs.add("0" + fingerprint);
+ hsDirs.add("1" + fingerprint);
+ }
+ final double RING_SIZE = new BigInteger(
+ "10000000000000000000000000000000000000000",
+ 16).doubleValue();
+ for (String fingerprint : fingerprints) {
+ double probabilityRendezvousPoint = 0.0,
+ fractionDescriptors = 0.0;
+ NetworkStatusEntry statusEntry =
+ consensus.getStatusEntry(fingerprint);
+ if (statusEntry != null) {
+ if (hsDirs.contains("1" + fingerprint)) {
+ String startResponsible = fingerprint;
+ int positionsToGo = 3;
+ for (String hsDirFingerprint :
+ hsDirs.tailSet("1" + fingerprint)) {
+ startResponsible = hsDirFingerprint;
+ if (positionsToGo-- <= 0) {
+ break;
+ }
+ }
+ fractionDescriptors =
+ new BigInteger("1" + fingerprint, 16).subtract(
+ new BigInteger(startResponsible, 16)).doubleValue()
+ / RING_SIZE;
+ fractionDescriptors /= 3.0;
+ }
+ probabilityRendezvousPoint =
+ weightsRendezvousPoint.get(fingerprint)
+ / totalWeightsRendezvousPoint;
+ }
+ if (!extractedConsensusFractions.containsKey(fingerprint)) {
+ extractedConsensusFractions.put(fingerprint,
+ new TreeSet<ConsensusFraction>());
+ }
+ extractedConsensusFractions.get(fingerprint).add(
+ new ConsensusFraction(consensus.getValidAfterMillis(),
+ consensus.getFreshUntilMillis(), probabilityRendezvousPoint,
+ fractionDescriptors));
+ }
+ }
+ }
+ return extractedConsensusFractions;
+ }
+
+ private static void extrapolateHidServStats(
+ SortedMap<String, SortedSet<HidServStats>> hidServStats,
+ SortedMap<String, SortedSet<ConsensusFraction>>
+ consensusFractions) throws Exception {
+ hidservStatsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(
+ new FileWriter(hidservStatsCsvFile));
+ bw.write("fingerprint,stats_start,stats_end,"
+ + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen,"
+ + "frac_rend_relayed_cells,frac_dir_onions_seen\n");
+ for (Map.Entry<String, SortedSet<HidServStats>> e :
+ hidServStats.entrySet()) {
+ String fingerprint = e.getKey();
+ if (!consensusFractions.containsKey(fingerprint)) {
+ System.err.println("We have hidserv-stats but no consensus "
+ + "fractions for " + fingerprint + ". Skipping.");
+ continue;
+ }
+ for (HidServStats stats : e.getValue()) {
+ long statsStartMillis = stats.statsEndMillis
+ - stats.statsIntervalSeconds * 1000L;
+ double sumProbabilityRendezvousPoint = 0.0,
+ sumResponsibleDescriptors = 0.0;
+ int statusEntries = 0;
+ for (ConsensusFraction frac :
+ consensusFractions.get(fingerprint)) {
+ if (statsStartMillis <= frac.validAfterMillis &&
+ frac.validAfterMillis < stats.statsEndMillis) {
+ sumProbabilityRendezvousPoint +=
+ frac.probabilityRendezvousPoint;
+ sumResponsibleDescriptors +=
+ frac.fractionResponsibleDescriptors;
+ statusEntries++;
+ }
+ }
+ double fracCells = sumProbabilityRendezvousPoint / statusEntries,
+ fracDescs = sumResponsibleDescriptors / statusEntries;
+ bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint,
+ DATE_TIME_FORMAT.format(statsStartMillis),
+ DATE_TIME_FORMAT.format(stats.statsEndMillis),
+ stats.rendRelayedCells, stats.dirOnionsSeen, fracCells,
+ fracDescs));
+ }
+ }
+ bw.close();
+ }
+}
+
diff --git a/task-13192/src/java/ExtrapolateHidServStats.java b/task-13192/src/java/ExtrapolateHidServStats.java
deleted file mode 100644
index 100520d..0000000
--- a/task-13192/src/java/ExtrapolateHidServStats.java
+++ /dev/null
@@ -1,722 +0,0 @@
-import java.io.BufferedWriter;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileWriter;
-import java.math.BigInteger;
-import java.text.DateFormat;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Scanner;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TimeZone;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import org.torproject.descriptor.Descriptor;
-import org.torproject.descriptor.DescriptorFile;
-import org.torproject.descriptor.DescriptorReader;
-import org.torproject.descriptor.DescriptorSourceFactory;
-import org.torproject.descriptor.ExtraInfoDescriptor;
-import org.torproject.descriptor.NetworkStatusEntry;
-import org.torproject.descriptor.RelayNetworkStatusConsensus;
-
-public class ExtrapolateHidServStats {
-
- private static File archiveExtraInfosDirectory =
- new File("in/collector/archive/relay-descriptors/extra-infos/");
-
- private static File recentExtraInfosDirectory =
- new File("in/collector/recent/relay-descriptors/extra-infos/");
-
- private static File archiveConsensuses =
- new File("in/collector/archive/relay-descriptors/consensuses/");
-
- private static File recentConsensuses =
- new File("in/collector/recent/relay-descriptors/consensuses/");
-
- private static File hidservStatsCsvFile =
- new File("out/csv/hidserv-stats.csv");
-
- private static File simCellsCsvFile =
- new File("out/csv/sim-cells.csv");
-
- private static File simOnionsCsvFile =
- new File("out/csv/sim-onions.csv");
-
- public static void main(String[] args) throws Exception {
- System.out.println("Extracting hidserv-* lines from extra-info "
- + "descriptors...");
- SortedMap<String, SortedSet<HidServStats>> hidServStats =
- extractHidServStats();
- System.out.println("Extracting fractions from consensuses...");
- SortedMap<String, SortedSet<ConsensusFraction>> consensusFractions =
- extractConsensusFractions(hidServStats.keySet());
- System.out.println("Extrapolating statistics...");
- extrapolateHidServStats(hidServStats, consensusFractions);
- System.out.println("Simulating extrapolation of rendezvous cells...");
- simulateCells();
- System.out.println("Simulating extrapolation of .onions...");
- simulateOnions();
- System.out.println("Terminating.");
- }
-
- private static final DateFormat DATE_TIME_FORMAT;
-
- static {
- DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
- DATE_TIME_FORMAT.setLenient(false);
- DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-
- private static class HidServStats implements Comparable<HidServStats> {
-
- /* Hidden-service statistics end timestamp in milliseconds. */
- private long statsEndMillis;
-
- /* Statistics interval length in seconds. */
- private long statsIntervalSeconds;
-
- /* Number of relayed cells reported by the relay and adjusted by
- * rounding to the nearest right side of a bin and subtracting half of
- * the bin size. */
- private long rendRelayedCells;
-
- /* Number of .onions reported by the relay and adjusted by rounding to
- * the nearest right side of a bin and subtracting half of the bin
- * size. */
- private long dirOnionsSeen;
-
- private HidServStats(long statsEndMillis, long statsIntervalSeconds,
- long rendRelayedCells, long dirOnionsSeen) {
- this.statsEndMillis = statsEndMillis;
- this.statsIntervalSeconds = statsIntervalSeconds;
- this.rendRelayedCells = rendRelayedCells;
- this.dirOnionsSeen = dirOnionsSeen;
- }
-
- @Override
- public boolean equals(Object otherObject) {
- if (!(otherObject instanceof HidServStats)) {
- return false;
- }
- HidServStats other = (HidServStats) otherObject;
- return this.statsEndMillis == other.statsEndMillis &&
- this.statsIntervalSeconds == other.statsIntervalSeconds &&
- this.rendRelayedCells == other.rendRelayedCells &&
- this.dirOnionsSeen == other.dirOnionsSeen;
- }
-
- @Override
- public int compareTo(HidServStats other) {
- return this.statsEndMillis < other.statsEndMillis ? -1 :
- this.statsEndMillis > other.statsEndMillis ? 1 : 0;
- }
- }
-
- /* Extract fingerprint and hidserv-* lines from extra-info descriptors
- * located in in/{archive,recent}/relay-descriptors/extra-infos/. */
- private static SortedMap<String, SortedSet<HidServStats>>
- extractHidServStats() {
- SortedMap<String, SortedSet<HidServStats>> extractedHidServStats =
- new TreeMap<String, SortedSet<HidServStats>>();
- DescriptorReader descriptorReader =
- DescriptorSourceFactory.createDescriptorReader();
- descriptorReader.addDirectory(archiveExtraInfosDirectory);
- descriptorReader.addDirectory(recentExtraInfosDirectory);
- Iterator<DescriptorFile> descriptorFiles =
- descriptorReader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (!(descriptor instanceof ExtraInfoDescriptor)) {
- continue;
- }
- String fingerprint =
- ((ExtraInfoDescriptor) descriptor).getFingerprint();
- Scanner scanner = new Scanner(new ByteArrayInputStream(
- descriptor.getRawDescriptorBytes()));
- Long statsEndMillis = null, statsIntervalSeconds = null,
- rendRelayedCells = null, dirOnionsSeen = null;
- try {
- while (scanner.hasNext()) {
- String line = scanner.nextLine();
- if (line.startsWith("hidserv-")) {
- String[] parts = line.split(" ");
- if (parts[0].equals("hidserv-stats-end")) {
- if (parts.length != 5 || !parts[3].startsWith("(") ||
- !parts[4].equals("s)")) {
- /* Will warn below, because statsEndMillis and
- * statsIntervalSeconds are still null. */
- continue;
- }
- statsEndMillis = DATE_TIME_FORMAT.parse(
- parts[1] + " " + parts[2]).getTime();
- statsIntervalSeconds =
- Long.parseLong(parts[3].substring(1));
- } else if (parts[0].equals("hidserv-rend-relayed-cells")) {
- if (parts.length != 5 ||
- !parts[4].startsWith("bin_size=")) {
- /* Will warn below, because rendRelayedCells is still
- * null. */
- continue;
- }
- rendRelayedCells = removeNoise(Long.parseLong(parts[1]),
- Long.parseLong(parts[4].substring(9)));
- } else if (parts[0].equals("hidserv-dir-onions-seen")) {
- if (parts.length != 5 ||
- !parts[4].startsWith("bin_size=")) {
- /* Will warn below, because dirOnionsSeen is still
- * null. */
- continue;
- }
- dirOnionsSeen = removeNoise(Long.parseLong(parts[1]),
- Long.parseLong(parts[4].substring(9)));
- }
- }
- }
- } catch (ParseException e) {
- e.printStackTrace();
- continue;
- } catch (NumberFormatException e) {
- e.printStackTrace();
- continue;
- }
- if (statsEndMillis == null && statsIntervalSeconds == null &&
- rendRelayedCells == null && dirOnionsSeen == null) {
- continue;
- } else if (statsEndMillis != null && statsIntervalSeconds != null
- && rendRelayedCells != null && dirOnionsSeen != null) {
- if (!extractedHidServStats.containsKey(fingerprint)) {
- extractedHidServStats.put(fingerprint,
- new TreeSet<HidServStats>());
- }
- extractedHidServStats.get(fingerprint).add(new HidServStats(
- statsEndMillis, statsIntervalSeconds, rendRelayedCells,
- dirOnionsSeen));
- } else {
- System.err.println("Relay " + fingerprint + " published "
- + "incomplete hidserv-stats. Ignoring.");
- }
- }
- }
- return extractedHidServStats;
- }
-
- private static long removeNoise(long reportedNumber, long binSize) {
- long roundedToNearestRightSideOfTheBin =
- ((reportedNumber + binSize / 2) / binSize) * binSize;
- long subtractedHalfOfBinSize =
- roundedToNearestRightSideOfTheBin - binSize / 2;
- return subtractedHalfOfBinSize;
- }
-
- private static class ConsensusFraction
- implements Comparable<ConsensusFraction> {
-
- /* Valid-after timestamp of the consensus in milliseconds. */
- private long validAfterMillis;
-
- /* Fresh-until timestamp of the consensus in milliseconds. */
- private long freshUntilMillis;
-
- /* Fraction of consensus weight in [0.0, 1.0] of this relay. */
- private double fractionConsensusWeight;
-
- /* Probability for being selected by clients as rendezvous point. */
- private double probabilityRendezvousPoint;
-
- /* Fraction of descriptor identifiers in [0.0, 1.0] that this relay
- * has been responsible for. This is the "distance" from the
- * fingerprint of the relay three HSDir positions earlier in the ring
- * to the fingerprint of this relay. Fractions of all HSDirs in a
- * consensus add up to 3.0, not 1.0. */
- private double fractionResponsibleDescriptors;
-
- private ConsensusFraction(long validAfterMillis,
- long freshUntilMillis,
- double fractionConsensusWeight,
- double probabilityRendezvousPoint,
- double fractionResponsibleDescriptors) {
- this.validAfterMillis = validAfterMillis;
- this.freshUntilMillis = freshUntilMillis;
- this.fractionConsensusWeight = fractionConsensusWeight;
- this.probabilityRendezvousPoint = probabilityRendezvousPoint;
- this.fractionResponsibleDescriptors =
- fractionResponsibleDescriptors;
- }
-
- @Override
- public boolean equals(Object otherObject) {
- if (!(otherObject instanceof ConsensusFraction)) {
- return false;
- }
- ConsensusFraction other = (ConsensusFraction) otherObject;
- return this.validAfterMillis == other.validAfterMillis &&
- this.freshUntilMillis == other.freshUntilMillis &&
- this.fractionResponsibleDescriptors ==
- other.fractionResponsibleDescriptors &&
- this.fractionConsensusWeight == other.fractionConsensusWeight &&
- this.probabilityRendezvousPoint ==
- other.probabilityRendezvousPoint;
- }
-
- @Override
- public int compareTo(ConsensusFraction other) {
- return this.validAfterMillis < other.validAfterMillis ? -1 :
- this.validAfterMillis > other.validAfterMillis ? 1 : 0;
- }
- }
-
- /* Extract fractions that relays were responsible for from consensuses
- * located in in/{archive,recent}/relay-descriptors/consensuses/. */
- private static SortedMap<String, SortedSet<ConsensusFraction>>
- extractConsensusFractions(Collection<String> fingerprints) {
- SortedMap<String, SortedSet<ConsensusFraction>>
- extractedConsensusFractions =
- new TreeMap<String, SortedSet<ConsensusFraction>>();
- DescriptorReader descriptorReader =
- DescriptorSourceFactory.createDescriptorReader();
- descriptorReader.addDirectory(archiveConsensuses);
- descriptorReader.addDirectory(recentConsensuses);
- Iterator<DescriptorFile> descriptorFiles =
- descriptorReader.readDescriptors();
- while (descriptorFiles.hasNext()) {
- DescriptorFile descriptorFile = descriptorFiles.next();
- for (Descriptor descriptor : descriptorFile.getDescriptors()) {
- if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
- continue;
- }
- RelayNetworkStatusConsensus consensus =
- (RelayNetworkStatusConsensus) descriptor;
- SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList(
- "Wmg,Wmm,Wme,Wmd".split(",")));
- weightKeys.removeAll(consensus.getBandwidthWeights().keySet());
- if (!weightKeys.isEmpty()) {
- System.err.println("Consensus with valid-after time "
- + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis())
- + " doesn't contain expected Wmx weights. Skipping.");
- continue;
- }
- double wmg = ((double) consensus.getBandwidthWeights().get("Wmg"))
- / 10000.0;
- double wmm = ((double) consensus.getBandwidthWeights().get("Wmm"))
- / 10000.0;
- double wme = ((double) consensus.getBandwidthWeights().get("Wme"))
- / 10000.0;
- double wmd = ((double) consensus.getBandwidthWeights().get("Wmd"))
- / 10000.0;
- SortedSet<String> hsDirs = new TreeSet<String>(
- Collections.reverseOrder());
- long totalConsensusWeight = 0L;
- double totalWeightsRendezvousPoint = 0.0;
- SortedMap<String, Double> weightsRendezvousPoint =
- new TreeMap<String, Double>();
- for (Map.Entry<String, NetworkStatusEntry> e :
- consensus.getStatusEntries().entrySet()) {
- String fingerprint = e.getKey();
- NetworkStatusEntry statusEntry = e.getValue();
- SortedSet<String> flags = statusEntry.getFlags();
- if (flags.contains("HSDir")) {
- hsDirs.add(statusEntry.getFingerprint());
- }
- totalConsensusWeight += statusEntry.getBandwidth();
- double weightRendezvousPoint = 0.0;
- if (flags.contains("Fast")) {
- weightRendezvousPoint = (double) statusEntry.getBandwidth();
- if (flags.contains("Guard") && flags.contains("Exit")) {
- weightRendezvousPoint *= wmd;
- } else if (flags.contains("Guard")) {
- weightRendezvousPoint *= wmg;
- } else if (flags.contains("Exit")) {
- weightRendezvousPoint *= wme;
- } else {
- weightRendezvousPoint *= wmm;
- }
- }
- weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
- totalWeightsRendezvousPoint += weightRendezvousPoint;
- }
- /* Add all HSDir fingerprints with leading "0" and "1" to
- * simplify the logic to traverse the ring start. */
- SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs);
- hsDirs.clear();
- for (String fingerprint : hsDirsCopy) {
- hsDirs.add("0" + fingerprint);
- hsDirs.add("1" + fingerprint);
- }
- final double RING_SIZE = new BigInteger(
- "10000000000000000000000000000000000000000",
- 16).doubleValue();
- for (String fingerprint : fingerprints) {
- double probabilityRendezvousPoint = 0.0,
- fractionResponsibleDescriptors = 0.0,
- fractionConsensusWeight = 0.0;
- NetworkStatusEntry statusEntry =
- consensus.getStatusEntry(fingerprint);
- if (statusEntry != null) {
- if (hsDirs.contains("1" + fingerprint)) {
- String startResponsible = fingerprint;
- int positionsToGo = 3;
- for (String hsDirFingerprint :
- hsDirs.tailSet("1" + fingerprint)) {
- startResponsible = hsDirFingerprint;
- if (positionsToGo-- <= 0) {
- break;
- }
- }
- fractionResponsibleDescriptors =
- new BigInteger("1" + fingerprint, 16).subtract(
- new BigInteger(startResponsible, 16)).doubleValue()
- / RING_SIZE;
- }
- fractionConsensusWeight =
- ((double) statusEntry.getBandwidth())
- / ((double) totalConsensusWeight);
- probabilityRendezvousPoint =
- weightsRendezvousPoint.get(fingerprint)
- / totalWeightsRendezvousPoint;
- }
- if (!extractedConsensusFractions.containsKey(fingerprint)) {
- extractedConsensusFractions.put(fingerprint,
- new TreeSet<ConsensusFraction>());
- }
- extractedConsensusFractions.get(fingerprint).add(
- new ConsensusFraction(consensus.getValidAfterMillis(),
- consensus.getFreshUntilMillis(), fractionConsensusWeight,
- probabilityRendezvousPoint,
- fractionResponsibleDescriptors));
- }
- }
- }
- return extractedConsensusFractions;
- }
-
- private static void extrapolateHidServStats(
- SortedMap<String, SortedSet<HidServStats>> hidServStats,
- SortedMap<String, SortedSet<ConsensusFraction>>
- consensusFractions) throws Exception {
- hidservStatsCsvFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(
- new FileWriter(hidservStatsCsvFile));
- bw.write("fingerprint,stats_start,stats_end,"
- + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen,"
- + "prob_rend_point,frac_hsdesc\n");
- for (Map.Entry<String, SortedSet<HidServStats>> e :
- hidServStats.entrySet()) {
- String fingerprint = e.getKey();
- if (!consensusFractions.containsKey(fingerprint)) {
- System.err.println("We have hidserv-stats but no consensus "
- + "fractions for " + fingerprint + ". Skipping.");
- continue;
- }
- for (HidServStats stats : e.getValue()) {
- long statsStartMillis = stats.statsEndMillis
- - stats.statsIntervalSeconds * 1000L;
- double sumProbabilityRendezvousPoint = 0.0,
- sumResponsibleDescriptors = 0.0;
- int statusEntries = 0;
- for (ConsensusFraction frac :
- consensusFractions.get(fingerprint)) {
- if (statsStartMillis <= frac.validAfterMillis &&
- frac.validAfterMillis < stats.statsEndMillis) {
- sumProbabilityRendezvousPoint +=
- frac.probabilityRendezvousPoint;
- sumResponsibleDescriptors +=
- frac.fractionResponsibleDescriptors;
- statusEntries++;
- }
- }
- bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint,
- DATE_TIME_FORMAT.format(statsStartMillis),
- DATE_TIME_FORMAT.format(stats.statsEndMillis),
- stats.rendRelayedCells, stats.dirOnionsSeen,
- sumProbabilityRendezvousPoint / statusEntries,
- sumResponsibleDescriptors / statusEntries));
- }
- }
- bw.close();
- }
-
- private static Random rnd = new Random(3);
-
- private static void simulateCells() throws Exception {
-
- /* Generate consensus weights following an exponential distribution
- * with lambda = 1 for 3000 potential rendezvous points. */
- final int numberRendPoints = 3000;
- double[] consensusWeights = new double[numberRendPoints];
- double totalConsensusWeight = 0.0;
- for (int i = 0; i < numberRendPoints; i++) {
- double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
- consensusWeights[i] = consensusWeight;
- totalConsensusWeight += consensusWeight;
- }
-
- /* Compute probabilities for being selected as rendezvous point. */
- double[] probRendPoint = new double[numberRendPoints];
- for (int i = 0; i < numberRendPoints; i++) {
- probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
- }
-
- /* Generate 10,000,000,000 (roughly 60 MiB/s) cells in chunks
- * following an exponential distribution with lambda = 0.00001 and
- * randomly assign them to a rendezvous point to report them later. */
- long cellsLeft = 10000000000L;
- final double cellsLambda = 0.00001;
- long[] observedCells = new long[numberRendPoints];
- while (cellsLeft > 0) {
- long cells = (long) (-Math.log(1.0 - rnd.nextDouble())
- / cellsLambda);
- double selectRendPoint = rnd.nextDouble();
- for (int i = 0; i < probRendPoint.length; i++) {
- selectRendPoint -= probRendPoint[i];
- if (selectRendPoint <= 0.0) {
- observedCells[i] += cells;
- break;
- }
- }
- cellsLeft -= cells;
- }
-
- /* Obfuscate reports using binning and Laplace noise, and then attempt
- * to remove noise again. */
- final long binSize = 1024L;
- final double b = 2048.0 / 0.3;
- long[] reportedCells = new long[numberRendPoints];
- long[] removedNoiseCells = new long[numberRendPoints];
- for (int i = 0; i < numberRendPoints; i++) {
- long observed = observedCells[i];
- long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
- double p = rnd.nextDouble();
- double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
- Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
- long reported = afterBinning + (long) laplaceNoise;
- reportedCells[i] = reported;
- long roundedToNearestRightSideOfTheBin =
- ((reported + binSize / 2) / binSize) * binSize;
- long subtractedHalfOfBinSize =
- roundedToNearestRightSideOfTheBin - binSize / 2;
- removedNoiseCells[i] = subtractedHalfOfBinSize;
- }
-
- /* Perform 10,000 extrapolations from random fractions of reports by
- * probability to be selected as rendezvous point. */
- simCellsCsvFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- simCellsCsvFile));
- bw.write("frac,p025,p500,p975\n");
- double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
- 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
- final int numberOfExtrapolations = 10000;
- for (double fraction : fractions) {
- List<Long> extrapolations = new ArrayList<Long>();
- for (int i = 0; i < numberOfExtrapolations; i++) {
- SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>();
- for (int j = 0; j < numberRendPoints; j++) {
- nonReportingRelays.add(j);
- }
- List<Integer> shuffledRelays = new ArrayList<Integer>(
- nonReportingRelays);
- Collections.shuffle(shuffledRelays);
- SortedSet<Integer> reportingRelays = new TreeSet<Integer>();
- for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
- j++) {
- reportingRelays.add(shuffledRelays.get(j));
- nonReportingRelays.remove(shuffledRelays.get(j));
- }
- double reportingProbability;
- long totalReports;
- do {
- reportingProbability = 0.0;
- totalReports = 0L;
- for (int reportingRelay : reportingRelays) {
- reportingProbability += probRendPoint[reportingRelay];
- totalReports += removedNoiseCells[reportingRelay];
- }
- if (reportingProbability < fraction - 0.001) {
- int addRelay = new ArrayList<Integer>(nonReportingRelays).get(
- rnd.nextInt(nonReportingRelays.size()));
- nonReportingRelays.remove(addRelay);
- reportingRelays.add(addRelay);
- } else if (reportingProbability > fraction + 0.001) {
- int removeRelay = new ArrayList<Integer>(reportingRelays).get(
- rnd.nextInt(reportingRelays.size()));
- reportingRelays.remove(removeRelay);
- nonReportingRelays.add(removeRelay);
- }
- } while (reportingProbability < fraction - 0.001 ||
- reportingProbability > fraction + 0.001);
- extrapolations.add((long) ((double) totalReports
- / reportingProbability));
- }
- Collections.sort(extrapolations);
- long p025 = extrapolations.get((extrapolations.size() * 25) / 1000),
- p500 = extrapolations.get((extrapolations.size() * 500) / 1000),
- p975 = extrapolations.get((extrapolations.size() * 975) / 1000);
- bw.write(String.format("%.2f,%d,%d,%d%n", fraction, p025, p500,
- p975));
- }
- bw.close();
- }
-
- private static void simulateOnions() throws Exception {
-
- /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
- final int numberHsDirs = 3000;
- SortedSet<Double> hsDirFingerprints = new TreeSet<Double>();
- for (int i = 0; i < numberHsDirs; i++) {
- hsDirFingerprints.add(rnd.nextDouble());
- }
-
- /* Compute fractions of observed descriptor space. */
- SortedSet<Double> ring =
- new TreeSet<Double>(Collections.reverseOrder());
- for (double fingerprint : hsDirFingerprints) {
- ring.add(fingerprint);
- ring.add(fingerprint - 1.0);
- }
- SortedMap<Double, Double> hsDirFractions =
- new TreeMap<Double, Double>();
- for (double fingerprint : hsDirFingerprints) {
- double start = fingerprint;
- int positionsToGo = 3;
- for (double prev : ring.tailSet(fingerprint)) {
- start = prev;
- if (positionsToGo-- <= 0) {
- break;
- }
- }
- hsDirFractions.put(fingerprint, fingerprint - start);
- }
-
- /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
- final int numberOnions = 40000;
- final int replicas = 4;
- final int storeOnDirs = 3;
- SortedMap<Double, SortedSet<Integer>> storedDescs =
- new TreeMap<Double, SortedSet<Integer>>();
- for (double fingerprint : hsDirFingerprints) {
- storedDescs.put(fingerprint, new TreeSet<Integer>());
- }
- for (int i = 0; i < numberOnions; i++) {
- for (int j = 0; j < replicas; j++) {
- int leftToStore = storeOnDirs;
- for (double fingerprint :
- hsDirFingerprints.tailSet(rnd.nextDouble())) {
- storedDescs.get(fingerprint).add(i);
- if (--leftToStore <= 0) {
- break;
- }
- }
- if (leftToStore > 0) {
- for (double fingerprint : hsDirFingerprints) {
- storedDescs.get(fingerprint).add(i);
- if (--leftToStore <= 0) {
- break;
- }
- }
- }
- }
- }
-
- /* Obfuscate reports using binning and Laplace noise, and then attempt
- * to remove noise again. */
- final long binSize = 8L;
- final double b = 8.0 / 0.3;
- SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(),
- removedNoiseOnions = new TreeMap<Double, Long>();
- for (Map.Entry<Double, SortedSet<Integer>> e :
- storedDescs.entrySet()) {
- double fingerprint = e.getKey();
- long observed = (long) e.getValue().size();
- long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
- double p = rnd.nextDouble();
- double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
- Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
- long reported = afterBinning + (long) laplaceNoise;
- reportedOnions.put(fingerprint, reported);
- long roundedToNearestRightSideOfTheBin =
- ((reported + binSize / 2) / binSize) * binSize;
- long subtractedHalfOfBinSize =
- roundedToNearestRightSideOfTheBin - binSize / 2;
- removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
- }
-
- /* Perform 10,000 extrapolations from random fractions of reports by
- * probability to be selected as rendezvous point. */
- simOnionsCsvFile.getParentFile().mkdirs();
- BufferedWriter bw = new BufferedWriter(new FileWriter(
- simOnionsCsvFile));
- bw.write("frac,p025,p500,p975\n");
- double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
- 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
- final int numberOfExtrapolations = 10000;
- for (double fraction : fractions) {
- List<Long> extrapolationsTwo = new ArrayList<Long>();
- for (int i = 0; i < numberOfExtrapolations; i++) {
- SortedSet<Double> nonReportingRelays =
- new TreeSet<Double>(hsDirFractions.keySet());
- List<Double> shuffledRelays = new ArrayList<Double>(
- nonReportingRelays);
- Collections.shuffle(shuffledRelays);
- SortedSet<Double> reportingRelays = new TreeSet<Double>();
- for (int j = 0; j < (int) ((double) hsDirFractions.size()
- * fraction); j++) {
- reportingRelays.add(shuffledRelays.get(j));
- nonReportingRelays.remove(shuffledRelays.get(j));
- }
- double reportingProbability;
- long totalReports;
- do {
- reportingProbability = 0.0;
- totalReports = 0L;
- for (double reportingRelay : reportingRelays) {
- reportingProbability += hsDirFractions.get(reportingRelay)
- / 3.0;
- totalReports += removedNoiseOnions.get(reportingRelay);
- }
- if (reportingProbability < fraction - 0.001) {
- double addRelay =
- new ArrayList<Double>(nonReportingRelays).get(
- rnd.nextInt(nonReportingRelays.size()));
- nonReportingRelays.remove(addRelay);
- reportingRelays.add(addRelay);
- } else if (reportingProbability > fraction + 0.001) {
- double removeRelay =
- new ArrayList<Double>(reportingRelays).get(
- rnd.nextInt(reportingRelays.size()));
- reportingRelays.remove(removeRelay);
- nonReportingRelays.add(removeRelay);
- }
- } while (reportingProbability < fraction - 0.001 ||
- reportingProbability > fraction + 0.001);
- double totalFraction = 0.0;
- for (double fingerprint : reportingRelays) {
- totalFraction += hsDirFractions.get(fingerprint) * 4.0;
- }
- extrapolationsTwo.add((long) ((double) totalReports
- / totalFraction));
- }
- Collections.sort(extrapolationsTwo);
- long pTwo025 = extrapolationsTwo.get(
- (extrapolationsTwo.size() * 25) / 1000),
- pTwo500 = extrapolationsTwo.get(
- (extrapolationsTwo.size() * 500) / 1000),
- pTwo975 = extrapolationsTwo.get(
- (extrapolationsTwo.size() * 975) / 1000);
- bw.write(String.format("%.2f,%d,%d,%d%n", fraction, pTwo025,
- pTwo500, pTwo975));
- }
- bw.close();
- }
-}
-
diff --git a/task-13192/src/java/Simulate.java b/task-13192/src/java/Simulate.java
new file mode 100644
index 0000000..e41e02c
--- /dev/null
+++ b/task-13192/src/java/Simulate.java
@@ -0,0 +1,357 @@
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+
+public class Simulate {
+ private static File simCellsCsvFile =
+ new File("out/csv/sim-cells.csv");
+
+ private static File simOnionsCsvFile =
+ new File("out/csv/sim-onions.csv");
+
+ public static void main(String[] args) throws Exception {
+ System.out.print("Simulating extrapolation of rendezvous cells");
+ simulateManyCells();
+ System.out.print("\nSimulating extrapolation of .onions");
+ simulateManyOnions();
+ System.out.println("\nTerminating.");
+ }
+
+ private static Random rnd = new Random();
+
+ private static void simulateManyCells() throws Exception {
+ simCellsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ simCellsCsvFile));
+ bw.write("run,frac,wmean,wmedian,wiqm\n");
+ final int numberOfExtrapolations = 1000;
+ for (int i = 0; i < numberOfExtrapolations; i++) {
+ bw.write(simulateCells(i));
+ System.out.print(".");
+ }
+ bw.close();
+ }
+
+ private static void simulateManyOnions() throws Exception {
+ simOnionsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ simOnionsCsvFile));
+ bw.write("run,frac,wmean,wmedian,wiqm\n");
+ final int numberOfExtrapolations = 1000;
+ for (int i = 0; i < numberOfExtrapolations; i++) {
+ bw.write(simulateOnions(i));
+ System.out.print(".");
+ }
+ bw.close();
+ }
+
+ private static String simulateCells(int run) {
+
+ /* Generate consensus weights following an exponential distribution
+ * with lambda = 1 for 3000 potential rendezvous points. */
+ final int numberRendPoints = 3000;
+ double[] consensusWeights = new double[numberRendPoints];
+ double totalConsensusWeight = 0.0;
+ for (int i = 0; i < numberRendPoints; i++) {
+ double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
+ consensusWeights[i] = consensusWeight;
+ totalConsensusWeight += consensusWeight;
+ }
+
+ /* Compute probabilities for being selected as rendezvous point. */
+ double[] probRendPoint = new double[numberRendPoints];
+ for (int i = 0; i < numberRendPoints; i++) {
+ probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
+ }
+
+ /* Generate 10,000,000,000 cells (474 Mbit/s) in chunks following an
+ * exponential distribution with lambda = 0.0001, so on average
+ * 10,000 cells per chunk, and randomly assign them to a rendezvous
+ * point to report them later. */
+ long cellsLeft = 10000000000L;
+ final double cellsLambda = 0.0001;
+ long[] observedCells = new long[numberRendPoints];
+ while (cellsLeft > 0) {
+ long cells = Math.min(cellsLeft,
+ (long) (-Math.log(1.0 - rnd.nextDouble()) / cellsLambda));
+ double selectRendPoint = rnd.nextDouble();
+ for (int i = 0; i < probRendPoint.length; i++) {
+ selectRendPoint -= probRendPoint[i];
+ if (selectRendPoint <= 0.0) {
+ observedCells[i] += cells;
+ break;
+ }
+ }
+ cellsLeft -= cells;
+ }
+
+ /* Obfuscate reports using binning and Laplace noise, and then attempt
+ * to remove noise again. */
+ final long binSize = 1024L;
+ final double b = 2048.0 / 0.3;
+ long[] reportedCells = new long[numberRendPoints];
+ long[] removedNoiseCells = new long[numberRendPoints];
+ for (int i = 0; i < numberRendPoints; i++) {
+ long observed = observedCells[i];
+ long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+ double p = rnd.nextDouble();
+ double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+ Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+ long reported = afterBinning + (long) laplaceNoise;
+ reportedCells[i] = reported;
+ long roundedToNearestRightSideOfTheBin =
+ ((reported + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ removedNoiseCells[i] = subtractedHalfOfBinSize;
+ }
+
+ /* Perform extrapolations from random fractions of reports by
+ * probability to be selected as rendezvous point. */
+ StringBuilder sb = new StringBuilder();
+ double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+ 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+ for (double fraction : fractions) {
+ SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>();
+ for (int j = 0; j < numberRendPoints; j++) {
+ nonReportingRelays.add(j);
+ }
+ List<Integer> shuffledRelays = new ArrayList<Integer>(
+ nonReportingRelays);
+ Collections.shuffle(shuffledRelays);
+ SortedSet<Integer> reportingRelays = new TreeSet<Integer>();
+ for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
+ j++) {
+ reportingRelays.add(shuffledRelays.get(j));
+ nonReportingRelays.remove(shuffledRelays.get(j));
+ }
+ List<double[]> singleRelayExtrapolations;
+ double totalReportingProbability;
+ do {
+ singleRelayExtrapolations = new ArrayList<double[]>();
+ totalReportingProbability = 0.0;
+ for (int reportingRelay : reportingRelays) {
+ double probability = probRendPoint[reportingRelay];
+ if (probability > 0.0) {
+ singleRelayExtrapolations.add(
+ new double[] {
+ removedNoiseCells[reportingRelay] / probability,
+ removedNoiseCells[reportingRelay],
+ probability });
+ }
+ totalReportingProbability += probability;
+ }
+ if (totalReportingProbability < fraction - 0.001) {
+ int addRelay = new ArrayList<Integer>(nonReportingRelays).get(
+ rnd.nextInt(nonReportingRelays.size()));
+ nonReportingRelays.remove(addRelay);
+ reportingRelays.add(addRelay);
+ } else if (totalReportingProbability > fraction + 0.001) {
+ int removeRelay = new ArrayList<Integer>(reportingRelays).get(
+ rnd.nextInt(reportingRelays.size()));
+ reportingRelays.remove(removeRelay);
+ nonReportingRelays.add(removeRelay);
+ }
+ } while (totalReportingProbability < fraction - 0.001 ||
+ totalReportingProbability > fraction + 0.001);
+ Collections.sort(singleRelayExtrapolations,
+ new Comparator<double[]>() {
+ public int compare(double[] o1, double[] o2) {
+ return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+ }
+ });
+ double totalProbability = 0.0, totalValues = 0.0;
+ double totalInterquartileProbability = 0.0,
+ totalInterquartileValues = 0.0;
+ Double weightedMedian = null;
+ for (double[] extrapolation : singleRelayExtrapolations) {
+ totalValues += extrapolation[1];
+ totalProbability += extrapolation[2];
+ if (weightedMedian == null &&
+ totalProbability > totalReportingProbability * 0.5) {
+ weightedMedian = extrapolation[0];
+ }
+ if (totalProbability > totalReportingProbability * 0.25 &&
+ totalProbability < totalReportingProbability * 0.75) {
+ totalInterquartileValues += extrapolation[1];
+ totalInterquartileProbability += extrapolation[2];
+ }
+ }
+ sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+ totalValues / totalProbability, weightedMedian,
+ totalInterquartileValues / totalInterquartileProbability));
+ }
+ return sb.toString();
+ }
+
+ private static String simulateOnions(final int run) {
+
+ /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
+ final int numberHsDirs = 3000;
+ SortedSet<Double> hsDirFingerprints = new TreeSet<Double>();
+ for (int i = 0; i < numberHsDirs; i++) {
+ hsDirFingerprints.add(rnd.nextDouble());
+ }
+
+ /* Compute fractions of observed descriptor space. */
+ SortedSet<Double> ring =
+ new TreeSet<Double>(Collections.reverseOrder());
+ for (double fingerprint : hsDirFingerprints) {
+ ring.add(fingerprint);
+ ring.add(fingerprint - 1.0);
+ }
+ SortedMap<Double, Double> hsDirFractions =
+ new TreeMap<Double, Double>();
+ for (double fingerprint : hsDirFingerprints) {
+ double start = fingerprint;
+ int positionsToGo = 3;
+ for (double prev : ring.tailSet(fingerprint)) {
+ start = prev;
+ if (positionsToGo-- <= 0) {
+ break;
+ }
+ }
+ hsDirFractions.put(fingerprint, fingerprint - start);
+ }
+
+ /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
+ final int numberOnions = 40000;
+ final int replicas = 4;
+ final int storeOnDirs = 3;
+ SortedMap<Double, SortedSet<Integer>> storedDescs =
+ new TreeMap<Double, SortedSet<Integer>>();
+ for (double fingerprint : hsDirFingerprints) {
+ storedDescs.put(fingerprint, new TreeSet<Integer>());
+ }
+ for (int i = 0; i < numberOnions; i++) {
+ for (int j = 0; j < replicas; j++) {
+ int leftToStore = storeOnDirs;
+ for (double fingerprint :
+ hsDirFingerprints.tailSet(rnd.nextDouble())) {
+ storedDescs.get(fingerprint).add(i);
+ if (--leftToStore <= 0) {
+ break;
+ }
+ }
+ if (leftToStore > 0) {
+ for (double fingerprint : hsDirFingerprints) {
+ storedDescs.get(fingerprint).add(i);
+ if (--leftToStore <= 0) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Obfuscate reports using binning and Laplace noise, and then attempt
+ * to remove noise again. */
+ final long binSize = 8L;
+ final double b = 8.0 / 0.3;
+ SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(),
+ removedNoiseOnions = new TreeMap<Double, Long>();
+ for (Map.Entry<Double, SortedSet<Integer>> e :
+ storedDescs.entrySet()) {
+ double fingerprint = e.getKey();
+ long observed = (long) e.getValue().size();
+ long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+ double p = rnd.nextDouble();
+ double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+ Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+ long reported = afterBinning + (long) laplaceNoise;
+ reportedOnions.put(fingerprint, reported);
+ long roundedToNearestRightSideOfTheBin =
+ ((reported + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
+ }
+
+ /* Perform extrapolations from random fractions of reports by
+ * probability to be selected as rendezvous point. */
+ StringBuilder sb = new StringBuilder();
+ double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+ 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+ for (double fraction : fractions) {
+ SortedSet<Double> nonReportingRelays =
+ new TreeSet<Double>(hsDirFractions.keySet());
+ List<Double> shuffledRelays = new ArrayList<Double>(
+ nonReportingRelays);
+ Collections.shuffle(shuffledRelays);
+ SortedSet<Double> reportingRelays = new TreeSet<Double>();
+ for (int j = 0; j < (int) ((double) hsDirFractions.size()
+ * fraction); j++) {
+ reportingRelays.add(shuffledRelays.get(j));
+ nonReportingRelays.remove(shuffledRelays.get(j));
+ }
+ List<double[]> singleRelayExtrapolations;
+ double totalReportingProbability;
+ do {
+ singleRelayExtrapolations = new ArrayList<double[]>();
+ totalReportingProbability = 0.0;
+ for (double reportingRelay : reportingRelays) {
+ double probability = hsDirFractions.get(reportingRelay) / 3.0;
+ if (probability > 0.0) {
+ singleRelayExtrapolations.add(
+ new double[] { removedNoiseOnions.get(reportingRelay)
+ / probability, removedNoiseOnions.get(reportingRelay),
+ probability });
+ }
+ totalReportingProbability += probability;
+ }
+ if (totalReportingProbability < fraction - 0.001) {
+ double addRelay =
+ new ArrayList<Double>(nonReportingRelays).get(
+ rnd.nextInt(nonReportingRelays.size()));
+ nonReportingRelays.remove(addRelay);
+ reportingRelays.add(addRelay);
+ } else if (totalReportingProbability > fraction + 0.001) {
+ double removeRelay =
+ new ArrayList<Double>(reportingRelays).get(
+ rnd.nextInt(reportingRelays.size()));
+ reportingRelays.remove(removeRelay);
+ nonReportingRelays.add(removeRelay);
+ }
+ } while (totalReportingProbability < fraction - 0.001 ||
+ totalReportingProbability > fraction + 0.001);
+ Collections.sort(singleRelayExtrapolations,
+ new Comparator<double[]>() {
+ public int compare(double[] o1, double[] o2) {
+ return o1[0] < o2[0] ? -1 : o1[0] > o2[0] ? 1 : 0;
+ }
+ });
+ double totalProbability = 0.0, totalValues = 0.0;
+ double totalInterquartileProbability = 0.0,
+ totalInterquartileValues = 0.0;
+ Double weightedMedian = null;
+ for (double[] extrapolation : singleRelayExtrapolations) {
+ totalValues += extrapolation[1];
+ totalProbability += extrapolation[2];
+ if (weightedMedian == null &&
+ totalProbability > totalReportingProbability * 0.5) {
+ weightedMedian = extrapolation[0];
+ }
+ if (totalProbability > totalReportingProbability * 0.25 &&
+ totalProbability < totalReportingProbability * 0.75) {
+ totalInterquartileValues += extrapolation[1];
+ totalInterquartileProbability += extrapolation[2];
+ }
+ }
+ sb.append(String.format("%d,%.2f,%.0f,%.0f,%.0f%n", run, fraction,
+ totalValues / totalProbability, weightedMedian,
+ totalInterquartileValues / totalInterquartileProbability));
+ }
+ return sb.toString();
+ }
+}
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits