[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Add hidserv-stats extrapolation code (#13192).
commit 968def62d5872fb23279a35c2474db276ae455e7
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Sat Jan 17 17:53:03 2015 +0100
Add hidserv-stats extrapolation code (#13192).
---
task-13192/.gitignore | 7 +
task-13192/README.md | 24 +
task-13192/src/R/plot.R | 246 ++++++++
task-13192/src/java/ExtrapolateHidServStats.java | 722 ++++++++++++++++++++++
4 files changed, 999 insertions(+)
diff --git a/task-13192/.gitignore b/task-13192/.gitignore
new file mode 100644
index 0000000..7e8bf3b
--- /dev/null
+++ b/task-13192/.gitignore
@@ -0,0 +1,7 @@
+in/
+.classpath
+.project
+src/bash/
+src/bin/
+out/
+
diff --git a/task-13192/README.md b/task-13192/README.md
new file mode 100644
index 0000000..c6ba2c8
--- /dev/null
+++ b/task-13192/README.md
@@ -0,0 +1,24 @@
+Extrapolating network totals from hidden-service statistics
+===========================================================
+
+Fetch (and inflate, but not extract) tarballs and/or fetch single files
+from CollecTor and store them in the following directories:
+
+ in/collector/archive/relay-descriptors/extra-infos/
+ in/collector/archive/relay-descriptors/consensuses/
+ in/collector/recent/relay-descriptors/extra-infos/
+ in/collector/recent/relay-descriptors/consensuses/
+
+Fetch the latest bandwidth.csv file from Metrics and put it in the
+following directory:
+
+ in/metrics/bandwidth.csv
+
+Add metrics-lib to the classpath and compile the classes in src/java/.
+
+Run Java class ExtrapolateHidServStats.
+
+Run the R script:
+
+ R --slave -f src/R/plot.R
+
diff --git a/task-13192/src/R/plot.R b/task-13192/src/R/plot.R
new file mode 100644
index 0000000..991928b
--- /dev/null
+++ b/task-13192/src/R/plot.R
@@ -0,0 +1,246 @@
+# Load required libraries.
+require(ggplot2, warn.conflicts = FALSE, quietly = TRUE)
+require(scales, warn.conflicts = FALSE, quietly = TRUE)
+require(reshape, warn.conflicts = FALSE, quietly = TRUE)
+require(splines, warn.conflicts = FALSE, quietly = TRUE)
+require(Hmisc, warn.conflicts = FALSE, quietly = TRUE)
+
+# Avoid scientific notation.
+options(scipen = 15)
+
+# Read .csv file written by Java.
+h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE)
+
+# Create directories for graphs.
+dir.create(file.path("out", "graphs", "report"), showWarnings = FALSE,
+ recursive = TRUE)
+dir.create(file.path("out", "graphs", "slides"), showWarnings = FALSE,
+ recursive = TRUE)
+
+# Cut off last two days, because stats might be incomplete for those.
+h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ]
+
+# Graph the number of reported stats by day.
+h7 <- data.frame(date = as.Date(h$stats_end), reports = 1)
+ggplot(h7, aes(x = date)) +
+geom_bar(colour = 'lightgray', width = .7, binwidth = 1) +
+scale_x_date("") +
+scale_y_continuous("")
+ggsave("out/graphs/report/num-reported-stats.pdf", width = 10, height = 3,
+ dpi = 100)
+ggsave("out/graphs/slides/hidserv-12.png", width = 8, height = 3,
+ dpi = 100)
+
+# Graph distributions of reported values by day.
+h1 <- data.frame(date = as.Date(h$stats_end),
+ traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
+ services = h$hidserv_dir_onions_seen)
+h1 <- melt(h1, "date")
+h1 <- data.frame(date = h1$date,
+ variable = ifelse(h1$variable == "traffic", "traffic in MB/s",
+ ".onion addresses"), value = h1$value)
+ggplot(h1, aes(x = date, y = value, group = date)) +
+geom_boxplot() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_date("") +
+scale_y_continuous("Statistics reported by single relays\n")
+ggsave("out/graphs/report/stats-by-day.pdf", width = 10, height = 5,
+ dpi = 100)
+
+# Graph distributions of calculated fractions by day.
+h2 <- data.frame(date = as.Date(h$stats_end),
+ prob_rend_point = h$prob_rend_point,
+ x_frac_hsdesc = h$frac_hsdesc / 3.0)
+h2 <- melt(h2, "date")
+h2 <- data.frame(date = h2$date,
+ variable = ifelse(h2$variable == "prob_rend_point",
+ "selected as rendezvous point", "responsible for a descriptor"),
+ value = h2$value)
+ggplot(h2, aes(x = date, y = value, group = date)) +
+geom_boxplot() +
+facet_grid(variable ~ ., scales = "free_y") +
+scale_x_date("") +
+scale_y_continuous("Calculated probabilities\n", labels = percent)
+ggsave("out/graphs/report/probs-by-relay.pdf", width = 10, height = 5,
+ dpi = 100)
+
+# Graph ECDF of cells reported by relays with rend point probability of 0.
+h8 <- h[h$prob_rend_point == 0,
+ "hidserv_rend_relayed_cells" ]
+h8 <- sort(h8)
+h8 <- data.frame(x = h8, y = (1:length(h8)) / length(h8))
+laplace_cells <- function(x) {
+ 0.5 + 0.5 * sign(x) * (1 - exp(abs(x) / (-2048/0.3)))
+}
+ggplot(h8, aes(x = x, y = y)) +
+geom_line() +
+stat_function(fun = laplace_cells, colour = "blue") +
+scale_x_continuous("\nReported cells on rendezvous circuits") +
+scale_y_continuous("Cumulative probability\n")
+ggsave("out/graphs/report/zero-prob-cells.pdf", width = 5, height = 3,
+ dpi = 100)
+
+# Graph ECDF of .onions reported by relays with HSDir probability of 0.
+h9 <- h[h$frac_hsdesc == 0, "hidserv_dir_onions_seen"]
+h9 <- sort(h9)
+h9 <- data.frame(x = h9, y = (1:length(h9)) / length(h9))
+laplace_onions <- function(x) {
+ 0.5 + 0.5 * sign(x) * (1 - exp(abs(x) / (-8/0.3)))
+}
+ggplot(h9, aes(x = x, y = y)) +
+geom_line() +
+stat_function(fun = laplace_onions, colour = "blue") +
+scale_x_continuous("\nReported .onion addresses") +
+scale_y_continuous("Cumulative probability\n")
+ggsave("out/graphs/report/zero-prob-onions.pdf", width = 5, height = 3,
+ dpi = 100)
+
+# Graph correlation between reports and fractions per relay.
+h3 <- rbind(
+ data.frame(x = h$frac_hsdesc / 3.0,
+ y = ifelse(h$frac_hsdesc == 0, NA, h$hidserv_dir_onions_seen),
+ facet = ".onion addresses"),
+ data.frame(x = h$prob_rend_point,
+ y = ifelse(h$prob_rend_point == 0, NA,
+ h$hidserv_rend_relayed_cells * 512 / (86400 * 1000)),
+ facet = "traffic in kB/s"))
+ggplot(h3[h3$facet == ".onion addresses", ], aes(x = x, y = y)) +
+geom_point(alpha = 0.5) +
+stat_smooth(method = "lm") +
+scale_x_continuous(name = "\nProbability", labels = percent) +
+scale_y_continuous(name = "Reported .onion addresses\n")
+ggsave("out/graphs/report/corr-probs-onions-by-relay.pdf", width = 5,
+ height = 3, dpi = 100)
+ggplot(h3[h3$facet == "traffic in kB/s", ], aes(x = x, y = y)) +
+geom_point(alpha = 0.5) +
+stat_smooth(method = "lm") +
+scale_x_continuous(name = "\nProbability", labels = percent) +
+scale_y_continuous(name = "Reported traffic in kB/s\n")
+ggsave("out/graphs/report/corr-probs-cells-by-relay.pdf", width = 5,
+ height = 3, dpi = 100)
+
+# Graph correlation between reports and fractions per day.
+h5 <- rbind(
+ data.frame(date = as.Date(h$stats_end),
+ prob = ifelse(h$frac_hsdesc == 0, NA, h$frac_hsdesc / 3.0),
+ reported = h$hidserv_dir_onions_seen, facet = "published descriptor"),
+ data.frame(date = as.Date(h$stats_end),
+ prob = ifelse(h$prob_rend_point == 0, NA, h$prob_rend_point),
+ reported = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
+ facet = "traffic in MB/s"))
+h5 <- na.omit(h5)
+h5 <- aggregate(list(prob = h5$prob, reported = h5$reported),
+ by = list(date = h5$date, facet = h5$facet), FUN = sum)
+ggplot(h5[h5$facet == "traffic in MB/s", ], aes(x = prob, y = reported)) +
+geom_point(alpha = 0.5) +
+scale_x_continuous(name = "\nTotal probability", labels = percent) +
+scale_y_continuous(name = "Total traffic in MB/s\n") +
+stat_smooth(method = "lm") +
+geom_vline(xintercept = 0.01, linetype = 2)
+ggsave("out/graphs/report/corr-probs-cells-by-day.pdf", width = 5,
+ height = 3, dpi = 100)
+ggplot(h5[h5$facet == "published descriptor", ],
+ aes(x = prob, y = reported)) +
+geom_point(alpha = 0.5) +
+scale_x_continuous(name = "\nTotal probability", labels = percent) +
+scale_y_continuous(name = "Total reported .onion addresses\n") +
+stat_smooth(method = "lm") +
+geom_vline(xintercept = 0.01, linetype = 2)
+ggsave("out/graphs/report/corr-probs-onions-by-day.pdf", width = 5,
+ height = 3, dpi = 100)
+
+# Graph extrapolated network totals.
+h6 <- data.frame(date = as.Date(h$stats_end),
+ traffic = ifelse(h$prob_rend_point == 0, 0,
+ h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000)),
+ prob_rend_point = h$prob_rend_point,
+ onions = ifelse(h$frac_hsdesc == 0, 0, h$hidserv_dir_onions_seen),
+ prob_onion = h$frac_hsdesc * 4.0)
+h6 <- aggregate(list(traffic = h6$traffic,
+ prob_rend_point = h6$prob_rend_point,
+ onions = h6$onions,
+ prob_onion = h6$prob_onion), by = list(date = h6$date), FUN = sum)
+h6 <- data.frame(date = h6$date,
+ traffic = ifelse(h6$prob_rend_point < 0.01, 0,
+ h6$traffic / h6$prob_rend_point),
+ onions = ifelse(h6$prob_onion / 12.0 < 0.01, 0,
+ h6$onions / h6$prob_onion))
+h6 <- melt(h6, "date")
+h6 <- h6[h6$value > 0, ]
+h6 <- rbind(h6, data.frame(date = NA, variable = c('traffic', 'onions'),
+ value = 0))
+h6 <- data.frame(date = h6$date,
+ variable = ifelse(h6$variable == "traffic", "total traffic in MB/s",
+ ".onion addresses"), value = h6$value)
+ggplot(h6, aes(date, value)) +
+facet_grid(variable ~ ., scales = "free_y") +
+geom_point() +
+stat_smooth() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "Extrapolated network totals\n")
+ggsave("out/graphs/report/extrapolated-network-totals.pdf", width = 10,
+ height = 5, dpi = 100)
+
+# Graph extrapolated number of .onion addresses.
+h11 <- h6[h6$variable == ".onion addresses", ]
+ggplot(h11, aes(x = date, y = value)) +
+geom_point() +
+stat_smooth() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "")
+ggsave("out/graphs/slides/hidserv-13.png", width = 8, height = 3,
+ dpi = 100)
+
+# Graph extrapolated fraction of hidden-service traffic.
+b <- read.csv("in/metrics/bandwidth.csv", stringsAsFactors = FALSE)
+b <- b[b$isexit == '' & b$isguard == '' & b$date > '2014-12-20', ]
+h10 <- data.frame(date = as.Date(h$stats_end),
+ traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000),
+ prob_rend_point = h$prob_rend_point)
+h10 <- aggregate(list(traffic = h10$traffic,
+ prob_rend_point = h10$prob_rend_point), by = list(date = h10$date),
+ FUN = sum)
+h10 <- data.frame(date = h10$date,
+ traffic = ifelse(h10$prob_rend_point < 0.01, 0,
+ h10$traffic / h10$prob_rend_point))
+h10 <- melt(h10, "date")
+h10 <- h10[h10$value > 0, ]
+h10 <- rbind(h10, data.frame(date = as.Date(b$date), variable = "bw",
+ value = b$bwread + b$bwwrite))
+h10 <- cast(h10, date ~ variable, value = "value")
+h10 <- na.omit(h10)
+h10 <- data.frame(date = h10$date,
+ value = h10$traffic * 1000 * 1000 / h10$bw)
+h10 <- rbind(h10, data.frame(date = NA, value = 0))
+ggplot(h10, aes(x = date, y = value)) +
+geom_point() +
+scale_x_date(name = "") +
+scale_y_continuous(name = "", labels = percent) +
+stat_smooth()
+ggsave("out/graphs/slides/hidserv-14.png", width = 8, height = 3,
+ dpi = 100)
+
+# Graph simulation results for cells on rendezvous circuits.
+s <- read.csv("out/csv/sim-cells.csv")
+ggplot(s, aes(x = frac, y = (p500 - 1e10) / 1e10,
+ ymin = (p025 - 1e10) / 1e10, ymax = (p975 - 1e10) / 1e10)) +
+geom_line() +
+geom_ribbon(alpha = 0.2) +
+scale_x_continuous("\nRendezvous points included in extrapolation",
+ labels = percent) +
+scale_y_continuous("Deviation from network totals\n", labels = percent)
+ggsave("out/graphs/report/sim-cells.pdf", width = 5, height = 3,
+ dpi = 100)
+
+# Graph simulation results for .onion addresses.
+o <- read.csv("out/csv/sim-onions.csv")
+ggplot(o, aes(x = frac, y = (p500 - 40000) / 40000,
+ ymin = (p025 - 40000) / 40000, ymax = (p975 - 40000) / 40000)) +
+geom_line() +
+geom_ribbon(alpha = 0.2) +
+scale_x_continuous("\nDirectories included in extrapolation",
+ labels = percent) +
+scale_y_continuous("Deviation from network totals\n", labels = percent)
+ggsave("out/graphs/report/sim-onions.pdf", width = 5, height = 3,
+ dpi = 100)
+
diff --git a/task-13192/src/java/ExtrapolateHidServStats.java b/task-13192/src/java/ExtrapolateHidServStats.java
new file mode 100644
index 0000000..100520d
--- /dev/null
+++ b/task-13192/src/java/ExtrapolateHidServStats.java
@@ -0,0 +1,722 @@
+import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileWriter;
+import java.math.BigInteger;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Scanner;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorFile;
+import org.torproject.descriptor.DescriptorReader;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.ExtraInfoDescriptor;
+import org.torproject.descriptor.NetworkStatusEntry;
+import org.torproject.descriptor.RelayNetworkStatusConsensus;
+
+public class ExtrapolateHidServStats {
+
+ private static File archiveExtraInfosDirectory =
+ new File("in/collector/archive/relay-descriptors/extra-infos/");
+
+ private static File recentExtraInfosDirectory =
+ new File("in/collector/recent/relay-descriptors/extra-infos/");
+
+ private static File archiveConsensuses =
+ new File("in/collector/archive/relay-descriptors/consensuses/");
+
+ private static File recentConsensuses =
+ new File("in/collector/recent/relay-descriptors/consensuses/");
+
+ private static File hidservStatsCsvFile =
+ new File("out/csv/hidserv-stats.csv");
+
+ private static File simCellsCsvFile =
+ new File("out/csv/sim-cells.csv");
+
+ private static File simOnionsCsvFile =
+ new File("out/csv/sim-onions.csv");
+
+ public static void main(String[] args) throws Exception {
+ System.out.println("Extracting hidserv-* lines from extra-info "
+ + "descriptors...");
+ SortedMap<String, SortedSet<HidServStats>> hidServStats =
+ extractHidServStats();
+ System.out.println("Extracting fractions from consensuses...");
+ SortedMap<String, SortedSet<ConsensusFraction>> consensusFractions =
+ extractConsensusFractions(hidServStats.keySet());
+ System.out.println("Extrapolating statistics...");
+ extrapolateHidServStats(hidServStats, consensusFractions);
+ System.out.println("Simulating extrapolation of rendezvous cells...");
+ simulateCells();
+ System.out.println("Simulating extrapolation of .onions...");
+ simulateOnions();
+ System.out.println("Terminating.");
+ }
+
+ private static final DateFormat DATE_TIME_FORMAT;
+
+ static {
+ DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ DATE_TIME_FORMAT.setLenient(false);
+ DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ private static class HidServStats implements Comparable<HidServStats> {
+
+ /* Hidden-service statistics end timestamp in milliseconds. */
+ private long statsEndMillis;
+
+ /* Statistics interval length in seconds. */
+ private long statsIntervalSeconds;
+
+ /* Number of relayed cells reported by the relay and adjusted by
+ * rounding to the nearest right side of a bin and subtracting half of
+ * the bin size. */
+ private long rendRelayedCells;
+
+ /* Number of .onions reported by the relay and adjusted by rounding to
+ * the nearest right side of a bin and subtracting half of the bin
+ * size. */
+ private long dirOnionsSeen;
+
+ private HidServStats(long statsEndMillis, long statsIntervalSeconds,
+ long rendRelayedCells, long dirOnionsSeen) {
+ this.statsEndMillis = statsEndMillis;
+ this.statsIntervalSeconds = statsIntervalSeconds;
+ this.rendRelayedCells = rendRelayedCells;
+ this.dirOnionsSeen = dirOnionsSeen;
+ }
+
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof HidServStats)) {
+ return false;
+ }
+ HidServStats other = (HidServStats) otherObject;
+ return this.statsEndMillis == other.statsEndMillis &&
+ this.statsIntervalSeconds == other.statsIntervalSeconds &&
+ this.rendRelayedCells == other.rendRelayedCells &&
+ this.dirOnionsSeen == other.dirOnionsSeen;
+ }
+
+ @Override
+ public int compareTo(HidServStats other) {
+ return this.statsEndMillis < other.statsEndMillis ? -1 :
+ this.statsEndMillis > other.statsEndMillis ? 1 : 0;
+ }
+ }
+
+ /* Extract fingerprint and hidserv-* lines from extra-info descriptors
+ * located in in/{archive,recent}/relay-descriptors/extra-infos/. */
+ private static SortedMap<String, SortedSet<HidServStats>>
+ extractHidServStats() {
+ SortedMap<String, SortedSet<HidServStats>> extractedHidServStats =
+ new TreeMap<String, SortedSet<HidServStats>>();
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(archiveExtraInfosDirectory);
+ descriptorReader.addDirectory(recentExtraInfosDirectory);
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof ExtraInfoDescriptor)) {
+ continue;
+ }
+ String fingerprint =
+ ((ExtraInfoDescriptor) descriptor).getFingerprint();
+ Scanner scanner = new Scanner(new ByteArrayInputStream(
+ descriptor.getRawDescriptorBytes()));
+ Long statsEndMillis = null, statsIntervalSeconds = null,
+ rendRelayedCells = null, dirOnionsSeen = null;
+ try {
+ while (scanner.hasNext()) {
+ String line = scanner.nextLine();
+ if (line.startsWith("hidserv-")) {
+ String[] parts = line.split(" ");
+ if (parts[0].equals("hidserv-stats-end")) {
+ if (parts.length != 5 || !parts[3].startsWith("(") ||
+ !parts[4].equals("s)")) {
+ /* Will warn below, because statsEndMillis and
+ * statsIntervalSeconds are still null. */
+ continue;
+ }
+ statsEndMillis = DATE_TIME_FORMAT.parse(
+ parts[1] + " " + parts[2]).getTime();
+ statsIntervalSeconds =
+ Long.parseLong(parts[3].substring(1));
+ } else if (parts[0].equals("hidserv-rend-relayed-cells")) {
+ if (parts.length != 5 ||
+ !parts[4].startsWith("bin_size=")) {
+ /* Will warn below, because rendRelayedCells is still
+ * null. */
+ continue;
+ }
+ rendRelayedCells = removeNoise(Long.parseLong(parts[1]),
+ Long.parseLong(parts[4].substring(9)));
+ } else if (parts[0].equals("hidserv-dir-onions-seen")) {
+ if (parts.length != 5 ||
+ !parts[4].startsWith("bin_size=")) {
+ /* Will warn below, because dirOnionsSeen is still
+ * null. */
+ continue;
+ }
+ dirOnionsSeen = removeNoise(Long.parseLong(parts[1]),
+ Long.parseLong(parts[4].substring(9)));
+ }
+ }
+ }
+ } catch (ParseException e) {
+ e.printStackTrace();
+ continue;
+ } catch (NumberFormatException e) {
+ e.printStackTrace();
+ continue;
+ }
+ if (statsEndMillis == null && statsIntervalSeconds == null &&
+ rendRelayedCells == null && dirOnionsSeen == null) {
+ continue;
+ } else if (statsEndMillis != null && statsIntervalSeconds != null
+ && rendRelayedCells != null && dirOnionsSeen != null) {
+ if (!extractedHidServStats.containsKey(fingerprint)) {
+ extractedHidServStats.put(fingerprint,
+ new TreeSet<HidServStats>());
+ }
+ extractedHidServStats.get(fingerprint).add(new HidServStats(
+ statsEndMillis, statsIntervalSeconds, rendRelayedCells,
+ dirOnionsSeen));
+ } else {
+ System.err.println("Relay " + fingerprint + " published "
+ + "incomplete hidserv-stats. Ignoring.");
+ }
+ }
+ }
+ return extractedHidServStats;
+ }
+
+ private static long removeNoise(long reportedNumber, long binSize) {
+ long roundedToNearestRightSideOfTheBin =
+ ((reportedNumber + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ return subtractedHalfOfBinSize;
+ }
+
+ private static class ConsensusFraction
+ implements Comparable<ConsensusFraction> {
+
+ /* Valid-after timestamp of the consensus in milliseconds. */
+ private long validAfterMillis;
+
+ /* Fresh-until timestamp of the consensus in milliseconds. */
+ private long freshUntilMillis;
+
+ /* Fraction of consensus weight in [0.0, 1.0] of this relay. */
+ private double fractionConsensusWeight;
+
+ /* Probability for being selected by clients as rendezvous point. */
+ private double probabilityRendezvousPoint;
+
+ /* Fraction of descriptor identifiers in [0.0, 1.0] that this relay
+ * has been responsible for. This is the "distance" from the
+ * fingerprint of the relay three HSDir positions earlier in the ring
+ * to the fingerprint of this relay. Fractions of all HSDirs in a
+ * consensus add up to 3.0, not 1.0. */
+ private double fractionResponsibleDescriptors;
+
+ private ConsensusFraction(long validAfterMillis,
+ long freshUntilMillis,
+ double fractionConsensusWeight,
+ double probabilityRendezvousPoint,
+ double fractionResponsibleDescriptors) {
+ this.validAfterMillis = validAfterMillis;
+ this.freshUntilMillis = freshUntilMillis;
+ this.fractionConsensusWeight = fractionConsensusWeight;
+ this.probabilityRendezvousPoint = probabilityRendezvousPoint;
+ this.fractionResponsibleDescriptors =
+ fractionResponsibleDescriptors;
+ }
+
+ @Override
+ public boolean equals(Object otherObject) {
+ if (!(otherObject instanceof ConsensusFraction)) {
+ return false;
+ }
+ ConsensusFraction other = (ConsensusFraction) otherObject;
+ return this.validAfterMillis == other.validAfterMillis &&
+ this.freshUntilMillis == other.freshUntilMillis &&
+ this.fractionResponsibleDescriptors ==
+ other.fractionResponsibleDescriptors &&
+ this.fractionConsensusWeight == other.fractionConsensusWeight &&
+ this.probabilityRendezvousPoint ==
+ other.probabilityRendezvousPoint;
+ }
+
+ @Override
+ public int compareTo(ConsensusFraction other) {
+ return this.validAfterMillis < other.validAfterMillis ? -1 :
+ this.validAfterMillis > other.validAfterMillis ? 1 : 0;
+ }
+ }
+
+ /* Extract fractions that relays were responsible for from consensuses
+ * located in in/{archive,recent}/relay-descriptors/consensuses/. */
+ private static SortedMap<String, SortedSet<ConsensusFraction>>
+ extractConsensusFractions(Collection<String> fingerprints) {
+ SortedMap<String, SortedSet<ConsensusFraction>>
+ extractedConsensusFractions =
+ new TreeMap<String, SortedSet<ConsensusFraction>>();
+ DescriptorReader descriptorReader =
+ DescriptorSourceFactory.createDescriptorReader();
+ descriptorReader.addDirectory(archiveConsensuses);
+ descriptorReader.addDirectory(recentConsensuses);
+ Iterator<DescriptorFile> descriptorFiles =
+ descriptorReader.readDescriptors();
+ while (descriptorFiles.hasNext()) {
+ DescriptorFile descriptorFile = descriptorFiles.next();
+ for (Descriptor descriptor : descriptorFile.getDescriptors()) {
+ if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
+ continue;
+ }
+ RelayNetworkStatusConsensus consensus =
+ (RelayNetworkStatusConsensus) descriptor;
+ SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList(
+ "Wmg,Wmm,Wme,Wmd".split(",")));
+ weightKeys.removeAll(consensus.getBandwidthWeights().keySet());
+ if (!weightKeys.isEmpty()) {
+ System.err.println("Consensus with valid-after time "
+ + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis())
+ + " doesn't contain expected Wmx weights. Skipping.");
+ continue;
+ }
+ double wmg = ((double) consensus.getBandwidthWeights().get("Wmg"))
+ / 10000.0;
+ double wmm = ((double) consensus.getBandwidthWeights().get("Wmm"))
+ / 10000.0;
+ double wme = ((double) consensus.getBandwidthWeights().get("Wme"))
+ / 10000.0;
+ double wmd = ((double) consensus.getBandwidthWeights().get("Wmd"))
+ / 10000.0;
+ SortedSet<String> hsDirs = new TreeSet<String>(
+ Collections.reverseOrder());
+ long totalConsensusWeight = 0L;
+ double totalWeightsRendezvousPoint = 0.0;
+ SortedMap<String, Double> weightsRendezvousPoint =
+ new TreeMap<String, Double>();
+ for (Map.Entry<String, NetworkStatusEntry> e :
+ consensus.getStatusEntries().entrySet()) {
+ String fingerprint = e.getKey();
+ NetworkStatusEntry statusEntry = e.getValue();
+ SortedSet<String> flags = statusEntry.getFlags();
+ if (flags.contains("HSDir")) {
+ hsDirs.add(statusEntry.getFingerprint());
+ }
+ totalConsensusWeight += statusEntry.getBandwidth();
+ double weightRendezvousPoint = 0.0;
+ if (flags.contains("Fast")) {
+ weightRendezvousPoint = (double) statusEntry.getBandwidth();
+ if (flags.contains("Guard") && flags.contains("Exit")) {
+ weightRendezvousPoint *= wmd;
+ } else if (flags.contains("Guard")) {
+ weightRendezvousPoint *= wmg;
+ } else if (flags.contains("Exit")) {
+ weightRendezvousPoint *= wme;
+ } else {
+ weightRendezvousPoint *= wmm;
+ }
+ }
+ weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint);
+ totalWeightsRendezvousPoint += weightRendezvousPoint;
+ }
+ /* Add all HSDir fingerprints with leading "0" and "1" to
+ * simplify the logic to traverse the ring start. */
+ SortedSet<String> hsDirsCopy = new TreeSet<String>(hsDirs);
+ hsDirs.clear();
+ for (String fingerprint : hsDirsCopy) {
+ hsDirs.add("0" + fingerprint);
+ hsDirs.add("1" + fingerprint);
+ }
+ final double RING_SIZE = new BigInteger(
+ "10000000000000000000000000000000000000000",
+ 16).doubleValue();
+ for (String fingerprint : fingerprints) {
+ double probabilityRendezvousPoint = 0.0,
+ fractionResponsibleDescriptors = 0.0,
+ fractionConsensusWeight = 0.0;
+ NetworkStatusEntry statusEntry =
+ consensus.getStatusEntry(fingerprint);
+ if (statusEntry != null) {
+ if (hsDirs.contains("1" + fingerprint)) {
+ String startResponsible = fingerprint;
+ int positionsToGo = 3;
+ for (String hsDirFingerprint :
+ hsDirs.tailSet("1" + fingerprint)) {
+ startResponsible = hsDirFingerprint;
+ if (positionsToGo-- <= 0) {
+ break;
+ }
+ }
+ fractionResponsibleDescriptors =
+ new BigInteger("1" + fingerprint, 16).subtract(
+ new BigInteger(startResponsible, 16)).doubleValue()
+ / RING_SIZE;
+ }
+ fractionConsensusWeight =
+ ((double) statusEntry.getBandwidth())
+ / ((double) totalConsensusWeight);
+ probabilityRendezvousPoint =
+ weightsRendezvousPoint.get(fingerprint)
+ / totalWeightsRendezvousPoint;
+ }
+ if (!extractedConsensusFractions.containsKey(fingerprint)) {
+ extractedConsensusFractions.put(fingerprint,
+ new TreeSet<ConsensusFraction>());
+ }
+ extractedConsensusFractions.get(fingerprint).add(
+ new ConsensusFraction(consensus.getValidAfterMillis(),
+ consensus.getFreshUntilMillis(), fractionConsensusWeight,
+ probabilityRendezvousPoint,
+ fractionResponsibleDescriptors));
+ }
+ }
+ }
+ return extractedConsensusFractions;
+ }
+
+ private static void extrapolateHidServStats(
+ SortedMap<String, SortedSet<HidServStats>> hidServStats,
+ SortedMap<String, SortedSet<ConsensusFraction>>
+ consensusFractions) throws Exception {
+ hidservStatsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(
+ new FileWriter(hidservStatsCsvFile));
+ bw.write("fingerprint,stats_start,stats_end,"
+ + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen,"
+ + "prob_rend_point,frac_hsdesc\n");
+ for (Map.Entry<String, SortedSet<HidServStats>> e :
+ hidServStats.entrySet()) {
+ String fingerprint = e.getKey();
+ if (!consensusFractions.containsKey(fingerprint)) {
+ System.err.println("We have hidserv-stats but no consensus "
+ + "fractions for " + fingerprint + ". Skipping.");
+ continue;
+ }
+ for (HidServStats stats : e.getValue()) {
+ long statsStartMillis = stats.statsEndMillis
+ - stats.statsIntervalSeconds * 1000L;
+ double sumProbabilityRendezvousPoint = 0.0,
+ sumResponsibleDescriptors = 0.0;
+ int statusEntries = 0;
+ for (ConsensusFraction frac :
+ consensusFractions.get(fingerprint)) {
+ if (statsStartMillis <= frac.validAfterMillis &&
+ frac.validAfterMillis < stats.statsEndMillis) {
+ sumProbabilityRendezvousPoint +=
+ frac.probabilityRendezvousPoint;
+ sumResponsibleDescriptors +=
+ frac.fractionResponsibleDescriptors;
+ statusEntries++;
+ }
+ }
+ bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint,
+ DATE_TIME_FORMAT.format(statsStartMillis),
+ DATE_TIME_FORMAT.format(stats.statsEndMillis),
+ stats.rendRelayedCells, stats.dirOnionsSeen,
+ sumProbabilityRendezvousPoint / statusEntries,
+ sumResponsibleDescriptors / statusEntries));
+ }
+ }
+ bw.close();
+ }
+
+ private static Random rnd = new Random(3);
+
+ private static void simulateCells() throws Exception {
+
+ /* Generate consensus weights following an exponential distribution
+ * with lambda = 1 for 3000 potential rendezvous points. */
+ final int numberRendPoints = 3000;
+ double[] consensusWeights = new double[numberRendPoints];
+ double totalConsensusWeight = 0.0;
+ for (int i = 0; i < numberRendPoints; i++) {
+ double consensusWeight = -Math.log(1.0 - rnd.nextDouble());
+ consensusWeights[i] = consensusWeight;
+ totalConsensusWeight += consensusWeight;
+ }
+
+ /* Compute probabilities for being selected as rendezvous point. */
+ double[] probRendPoint = new double[numberRendPoints];
+ for (int i = 0; i < numberRendPoints; i++) {
+ probRendPoint[i] = consensusWeights[i] / totalConsensusWeight;
+ }
+
+ /* Generate 10,000,000,000 (roughly 60 MiB/s) cells in chunks
+ * following an exponential distribution with lambda = 0.00001 and
+ * randomly assign them to a rendezvous point to report them later. */
+ long cellsLeft = 10000000000L;
+ final double cellsLambda = 0.00001;
+ long[] observedCells = new long[numberRendPoints];
+ while (cellsLeft > 0) {
+ long cells = (long) (-Math.log(1.0 - rnd.nextDouble())
+ / cellsLambda);
+ double selectRendPoint = rnd.nextDouble();
+ for (int i = 0; i < probRendPoint.length; i++) {
+ selectRendPoint -= probRendPoint[i];
+ if (selectRendPoint <= 0.0) {
+ observedCells[i] += cells;
+ break;
+ }
+ }
+ cellsLeft -= cells;
+ }
+
+ /* Obfuscate reports using binning and Laplace noise, and then attempt
+ * to remove noise again. */
+ final long binSize = 1024L;
+ final double b = 2048.0 / 0.3;
+ long[] reportedCells = new long[numberRendPoints];
+ long[] removedNoiseCells = new long[numberRendPoints];
+ for (int i = 0; i < numberRendPoints; i++) {
+ long observed = observedCells[i];
+ long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+ double p = rnd.nextDouble();
+ double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+ Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+ long reported = afterBinning + (long) laplaceNoise;
+ reportedCells[i] = reported;
+ long roundedToNearestRightSideOfTheBin =
+ ((reported + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ removedNoiseCells[i] = subtractedHalfOfBinSize;
+ }
+
+ /* Perform 10,000 extrapolations from random fractions of reports by
+ * probability to be selected as rendezvous point. */
+ simCellsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ simCellsCsvFile));
+ bw.write("frac,p025,p500,p975\n");
+ double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+ 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+ final int numberOfExtrapolations = 10000;
+ for (double fraction : fractions) {
+ List<Long> extrapolations = new ArrayList<Long>();
+ for (int i = 0; i < numberOfExtrapolations; i++) {
+ SortedSet<Integer> nonReportingRelays = new TreeSet<Integer>();
+ for (int j = 0; j < numberRendPoints; j++) {
+ nonReportingRelays.add(j);
+ }
+ List<Integer> shuffledRelays = new ArrayList<Integer>(
+ nonReportingRelays);
+ Collections.shuffle(shuffledRelays);
+ SortedSet<Integer> reportingRelays = new TreeSet<Integer>();
+ for (int j = 0; j < (int) ((double) numberRendPoints * fraction);
+ j++) {
+ reportingRelays.add(shuffledRelays.get(j));
+ nonReportingRelays.remove(shuffledRelays.get(j));
+ }
+ double reportingProbability;
+ long totalReports;
+ do {
+ reportingProbability = 0.0;
+ totalReports = 0L;
+ for (int reportingRelay : reportingRelays) {
+ reportingProbability += probRendPoint[reportingRelay];
+ totalReports += removedNoiseCells[reportingRelay];
+ }
+ if (reportingProbability < fraction - 0.001) {
+ int addRelay = new ArrayList<Integer>(nonReportingRelays).get(
+ rnd.nextInt(nonReportingRelays.size()));
+ nonReportingRelays.remove(addRelay);
+ reportingRelays.add(addRelay);
+ } else if (reportingProbability > fraction + 0.001) {
+ int removeRelay = new ArrayList<Integer>(reportingRelays).get(
+ rnd.nextInt(reportingRelays.size()));
+ reportingRelays.remove(removeRelay);
+ nonReportingRelays.add(removeRelay);
+ }
+ } while (reportingProbability < fraction - 0.001 ||
+ reportingProbability > fraction + 0.001);
+ extrapolations.add((long) ((double) totalReports
+ / reportingProbability));
+ }
+ Collections.sort(extrapolations);
+ long p025 = extrapolations.get((extrapolations.size() * 25) / 1000),
+ p500 = extrapolations.get((extrapolations.size() * 500) / 1000),
+ p975 = extrapolations.get((extrapolations.size() * 975) / 1000);
+ bw.write(String.format("%.2f,%d,%d,%d%n", fraction, p025, p500,
+ p975));
+ }
+ bw.close();
+ }
+
+ private static void simulateOnions() throws Exception {
+
+ /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */
+ final int numberHsDirs = 3000;
+ SortedSet<Double> hsDirFingerprints = new TreeSet<Double>();
+ for (int i = 0; i < numberHsDirs; i++) {
+ hsDirFingerprints.add(rnd.nextDouble());
+ }
+
+ /* Compute fractions of observed descriptor space. */
+ SortedSet<Double> ring =
+ new TreeSet<Double>(Collections.reverseOrder());
+ for (double fingerprint : hsDirFingerprints) {
+ ring.add(fingerprint);
+ ring.add(fingerprint - 1.0);
+ }
+ SortedMap<Double, Double> hsDirFractions =
+ new TreeMap<Double, Double>();
+ for (double fingerprint : hsDirFingerprints) {
+ double start = fingerprint;
+ int positionsToGo = 3;
+ for (double prev : ring.tailSet(fingerprint)) {
+ start = prev;
+ if (positionsToGo-- <= 0) {
+ break;
+ }
+ }
+ hsDirFractions.put(fingerprint, fingerprint - start);
+ }
+
+ /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */
+ final int numberOnions = 40000;
+ final int replicas = 4;
+ final int storeOnDirs = 3;
+ SortedMap<Double, SortedSet<Integer>> storedDescs =
+ new TreeMap<Double, SortedSet<Integer>>();
+ for (double fingerprint : hsDirFingerprints) {
+ storedDescs.put(fingerprint, new TreeSet<Integer>());
+ }
+ for (int i = 0; i < numberOnions; i++) {
+ for (int j = 0; j < replicas; j++) {
+ int leftToStore = storeOnDirs;
+ for (double fingerprint :
+ hsDirFingerprints.tailSet(rnd.nextDouble())) {
+ storedDescs.get(fingerprint).add(i);
+ if (--leftToStore <= 0) {
+ break;
+ }
+ }
+ if (leftToStore > 0) {
+ for (double fingerprint : hsDirFingerprints) {
+ storedDescs.get(fingerprint).add(i);
+ if (--leftToStore <= 0) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Obfuscate reports using binning and Laplace noise, and then attempt
+ * to remove noise again. */
+ final long binSize = 8L;
+ final double b = 8.0 / 0.3;
+ SortedMap<Double, Long> reportedOnions = new TreeMap<Double, Long>(),
+ removedNoiseOnions = new TreeMap<Double, Long>();
+ for (Map.Entry<Double, SortedSet<Integer>> e :
+ storedDescs.entrySet()) {
+ double fingerprint = e.getKey();
+ long observed = (long) e.getValue().size();
+ long afterBinning = ((observed + binSize - 1L) / binSize) * binSize;
+ double p = rnd.nextDouble();
+ double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) *
+ Math.log(1.0 - 2.0 * Math.abs(p - 0.5));
+ long reported = afterBinning + (long) laplaceNoise;
+ reportedOnions.put(fingerprint, reported);
+ long roundedToNearestRightSideOfTheBin =
+ ((reported + binSize / 2) / binSize) * binSize;
+ long subtractedHalfOfBinSize =
+ roundedToNearestRightSideOfTheBin - binSize / 2;
+ removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize);
+ }
+
+ /* Perform 10,000 extrapolations from random fractions of reports by
+ * probability to be selected as rendezvous point. */
+ simOnionsCsvFile.getParentFile().mkdirs();
+ BufferedWriter bw = new BufferedWriter(new FileWriter(
+ simOnionsCsvFile));
+ bw.write("frac,p025,p500,p975\n");
+ double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1,
+ 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 };
+ final int numberOfExtrapolations = 10000;
+ for (double fraction : fractions) {
+ List<Long> extrapolationsTwo = new ArrayList<Long>();
+ for (int i = 0; i < numberOfExtrapolations; i++) {
+ SortedSet<Double> nonReportingRelays =
+ new TreeSet<Double>(hsDirFractions.keySet());
+ List<Double> shuffledRelays = new ArrayList<Double>(
+ nonReportingRelays);
+ Collections.shuffle(shuffledRelays);
+ SortedSet<Double> reportingRelays = new TreeSet<Double>();
+ for (int j = 0; j < (int) ((double) hsDirFractions.size()
+ * fraction); j++) {
+ reportingRelays.add(shuffledRelays.get(j));
+ nonReportingRelays.remove(shuffledRelays.get(j));
+ }
+ double reportingProbability;
+ long totalReports;
+ do {
+ reportingProbability = 0.0;
+ totalReports = 0L;
+ for (double reportingRelay : reportingRelays) {
+ reportingProbability += hsDirFractions.get(reportingRelay)
+ / 3.0;
+ totalReports += removedNoiseOnions.get(reportingRelay);
+ }
+ if (reportingProbability < fraction - 0.001) {
+ double addRelay =
+ new ArrayList<Double>(nonReportingRelays).get(
+ rnd.nextInt(nonReportingRelays.size()));
+ nonReportingRelays.remove(addRelay);
+ reportingRelays.add(addRelay);
+ } else if (reportingProbability > fraction + 0.001) {
+ double removeRelay =
+ new ArrayList<Double>(reportingRelays).get(
+ rnd.nextInt(reportingRelays.size()));
+ reportingRelays.remove(removeRelay);
+ nonReportingRelays.add(removeRelay);
+ }
+ } while (reportingProbability < fraction - 0.001 ||
+ reportingProbability > fraction + 0.001);
+ double totalFraction = 0.0;
+ for (double fingerprint : reportingRelays) {
+ totalFraction += hsDirFractions.get(fingerprint) * 4.0;
+ }
+ extrapolationsTwo.add((long) ((double) totalReports
+ / totalFraction));
+ }
+ Collections.sort(extrapolationsTwo);
+ long pTwo025 = extrapolationsTwo.get(
+ (extrapolationsTwo.size() * 25) / 1000),
+ pTwo500 = extrapolationsTwo.get(
+ (extrapolationsTwo.size() * 500) / 1000),
+ pTwo975 = extrapolationsTwo.get(
+ (extrapolationsTwo.size() * 975) / 1000);
+ bw.write(String.format("%.2f,%d,%d,%d%n", fraction, pTwo025,
+ pTwo500, pTwo975));
+ }
+ bw.close();
+ }
+}
+
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits