[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [ernie/master 2/3] Document server descriptor stats.
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Tue, 9 Mar 2010 14:36:16 +0100
Subject: Document server descriptor stats.
Commit: fe66e8037fd344c235b81d0280ef6134564c591c
---
config | 10 +
src/Configuration.java | 23 ++
src/Main.java | 8 +-
src/ServerDescriptorStatsFileHandler.java | 460 ++++++++++++++++++-----------
4 files changed, 321 insertions(+), 180 deletions(-)
diff --git a/config b/config
index f900bf8..5c202f1 100644
--- a/config
+++ b/config
@@ -36,6 +36,16 @@
## Write bridge stats to disk
#WriteBridgeStats 1
+## Write server descriptors stats to disk
+#WriteServerDescriptorStats 1
+
+## Comma-separated list of relay versions to be included in version-stats
+#RelayVersions 0.1.2,0.2.0,0.2.1,0.2.2
+
+## Comma-separated list of relay platforms to be included in
+## platform-stats
+#RelayPlatforms Linux,Windows,Darwin,FreeBSD
+
## Write directory archives to disk
#WriteDirectoryArchives 0
diff --git a/src/Configuration.java b/src/Configuration.java
index cc90e6e..4ba3044 100644
--- a/src/Configuration.java
+++ b/src/Configuration.java
@@ -17,6 +17,11 @@ public class Configuration {
Arrays.asList(("8522EB98C91496E80EC238E732594D1509158E77,"
+ "9695DFC35FFEB861329B9F1AB04C46397020CE31").split(",")));
private boolean writeBridgeStats = true;
+ private boolean writeServerDescriptorStats = true;
+ private List<String> relayVersions = new ArrayList<String>(Arrays.asList(
+ "0.1.2,0.2.0,0.2.1,0.2.2".split(",")));
+ private List<String> relayPlatforms = new ArrayList<String>(Arrays.asList(
+ "Linux,Windows,Darwin,FreeBSD".split(",")));
private boolean writeDirectoryArchives = false;
private SortedSet<String> v3DirectoryAuthorities = new TreeSet<String>(
Arrays.asList(("14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4,"
@@ -74,6 +79,15 @@ public class Configuration {
} else if (line.startsWith("WriteBridgeStats")) {
this.writeBridgeStats = Integer.parseInt(
line.split(" ")[1]) != 0;
+ } else if (line.startsWith("WriteServerDescriptorStats")) {
+ this.writeServerDescriptorStats = Integer.parseInt(
+ line.split(" ")[1]) != 0;
+ } else if (line.startsWith("RelayVersions")) {
+ this.relayVersions = new ArrayList<String>(
+ Arrays.asList(line.split(" ")[1].split(",")));
+ } else if (line.startsWith("RelayPlatforms")) {
+ this.relayPlatforms = new ArrayList<String>(
+ Arrays.asList(line.split(" ")[1].split(",")));
} else if (line.startsWith("WriteDirectoryArchives")) {
this.writeDirectoryArchives = Integer.parseInt(
line.split(" ")[1]) != 0;
@@ -165,6 +179,15 @@ public class Configuration {
public boolean getWriteBridgeStats() {
return this.writeBridgeStats;
}
+ public boolean getWriteServerDescriptorStats() {
+ return this.writeServerDescriptorStats;
+ }
+ public List<String> getRelayVersions() {
+ return this.relayVersions;
+ }
+ public List<String> getRelayPlatforms() {
+ return this.relayPlatforms;
+ }
public boolean getWriteDirectoryArchives() {
return this.writeDirectoryArchives;
}
diff --git a/src/Main.java b/src/Main.java
index 34d309c..2d96396 100644
--- a/src/Main.java
+++ b/src/Main.java
@@ -34,12 +34,15 @@ public class Main {
DirreqStatsFileHandler dsfh = config.getWriteDirreqStats() ?
new DirreqStatsFileHandler(countries) : null;
ServerDescriptorStatsFileHandler sdsfh =
- new ServerDescriptorStatsFileHandler();
+ config.getWriteServerDescriptorStats() ?
+ new ServerDescriptorStatsFileHandler(config.getRelayVersions(),
+ config.getRelayPlatforms()) : null;
// Prepare relay descriptor parser (only if we are writing the
// stats)
RelayDescriptorParser rdp = config.getWriteConsensusStats() ||
- config.getWriteBridgeStats() || config.getWriteDirreqStats() ?
+ config.getWriteBridgeStats() || config.getWriteDirreqStats() ||
+ config.getWriteServerDescriptorStats() ?
new RelayDescriptorParser(csfh, bsfh, dsfh, sdsfh, countries,
directories) : null;
@@ -122,4 +125,3 @@ public class Main {
logger.info("Terminating ERNIE.");
}
}
-
diff --git a/src/ServerDescriptorStatsFileHandler.java b/src/ServerDescriptorStatsFileHandler.java
index 37e71c3..6aa63b8 100644
--- a/src/ServerDescriptorStatsFileHandler.java
+++ b/src/ServerDescriptorStatsFileHandler.java
@@ -3,84 +3,141 @@ import java.text.*;
import java.util.*;
import java.util.logging.*;
- /**
- * two pieces of information: consensuses referencing N server
- * descriptors that are combined with relay flags (like Running) and
- * server descriptors containing information about tor
- * versions, platforms, and advertised bandwidth. we want stats that
- * combine information from consensuses and server descriptors. in
- * databases this is a n:m relation with n consensus referencing m
- * server descriptors. so, the straightforward way is to keep parse
- * results in 2 tables and join them for extracting statistics.
- * however, we don't want to use a database here. and even if we had
- * a database, the table join would be too expensive to perform after
- * adding new data every hour.
- *
- * the approach we take here is to de-normalize the data and write
- * the join of consensuses and server descriptors into one file that
- * is never kept in memory in the whole. this file has entries for
- * every consensus line referencing a server descriptor and the
- * information we want to use from the references server descriptor,
- * if available. in addition to that, we need a smaller file containing
- * unreferenced server descriptors that we were not able to write to
- * the first file, yet. by implementing the join operation manually,
- * we can make use of the fact that descriptors are not referenced for
- * longer than 24 hours.
- *
- * stats/relay-version-stats:
- * date,v011,v012,v020,v021,v022,other
- *
- * stats/relay-platform-stats:
- * date,windows,sunos,openbsd,netbsd,linux,freebsd,dragonfly,darwin,other
- *
- * stats/relay-bandwidth-stats:
- * date,q1,md,q3
- *
- * read largefile and merge our data in; also generate stats
- * datetime,descriptor,version,platform,advbw
- * 320095,aZ7mNo3lkjf2li34hlkvjsdru2,0.2.1,Darwin,1024
- *
- * TODO future extension: remove lines from server-descriptor-stats-raw
- * as soon as we have written a full day (all consensuses, all SDs).
- */
+/**
+ * Generates statistics about relays in the Tor network from data that
+ * relays write to their server descriptors. Accepts lists of referenced
+ * descriptors in network status consensuses and selected lines from
+ * server descriptors from <code>RelayDescriptorParser</code>. Keeps two
+ * intermediate results files <code>stats/consensuses-raw</code> and
+ * <code>stats/descriptors-raw</code> and writes three final results files
+ * <code>stats/version-stats</code>, <code>stats/platform-stats</code>,
+ * and <code>stats/bandwidth-stats</code>.
+ */
public class ServerDescriptorStatsFileHandler {
+ /**
+ * Intermediate results file <code>stats/consensuses-raw</code>
+ * containing consensuses and the referenced descriptor identities of
+ * relays with the Running flag set. The file format is
+ * "valid-after,descid,descid,descid...\n" for each consensus. Lines are
+ * ordered by valid-after time in ascending order.
+ */
private File consensusesFile;
+
+ /**
+ * Temporary file for writing <code>stats/consensuses-raw</code> while
+ * reading that file at the same time. After read and write operations
+ * are complete, the original file is deleted and the temporary file
+ * renamed to be the new intermediate results file.
+ */
private File consensusesTempFile;
+
+ /**
+ * Intermediate results file <code>stats/descriptors-raw</code>
+ * containing server descriptors with relevant fields for statistics.
+ * The file format is "published,descid,version,platform,advbw\n" for
+ * each server descriptors. Lines are first ordered by published time,
+ * then by descid.
+ */
private File descriptorsFile;
+
+ /**
+ * Temporary file for writing <code>stats/descriptors-raw</code> while
+ * reading that file at the same time. After read and write operations
+ * are complete, the original file is deleted and the temporary file
+ * renamed to be the new intermediate results file.
+ */
private File descriptorsTempFile;
+
+ /**
+ * Final results file <code>stats/version-stats</code> containing
+ * statistics about Tor versions of relays in the network. The file
+ * format is "date,version1,version2,...,other" with versions as
+ * specified in config option RelayVersions.
+ */
private File versionStatsFile;
+
+ /**
+ * Final results file <code>stats/platform-stats</code> containing
+ * statistics about operating systems of relays in the network. The
+ * file format is "date,os1,os2,...,other" with operating systems as
+ * specified in config option RelayPlatforms.
+ */
private File platformStatsFile;
+
+ /**
+ * Final results file <code>stats/bandwidth-stats</code> containing
+ * statistics about the advertised bandwidth of relays in the network.
+ * The file format is "date,advbw".
+ */
private File bandwidthStatsFile;
/**
- * map key "valid-after", map value "valid-after,descid,descid,descid.."
+ * Consensuses and referenced descriptor identities of relays with the
+ * Running flag set. This data structure only holds those consensuses
+ * that were parsed in this execution, not the previously parsed
+ * consensuses as read from disk. Map keys are valid-after times
+ * formatted as "yyyy-MM-dd HH:mm:ss", map values are valid-after times
+ * followed by a comma-separated list of base-64-formatted descriptor
+ * identifiers.
*/
private SortedMap<String, String> consensuses;
/**
- * map key "published,descid"
- * map value "published,descid,version,platform,bandwidth"
+ * Server descriptors with relevant fields for statistics, ordered by
+ * published time and descriptor identifier. Map keys are publication
+ * times of descriptors formatted as "yyyy-MM-dd HH:mm:ss", a comma, and
+ * base-64-formatted descriptor identifiers. An example key is
+ * "2009-09-30 20:42:19,ZQZ5zq4q1U8Uynyk6lkUy5uAsdM" (length 47). Map
+ * values are map keys plus version, platform, and advertised bandwidth
+ * written as "published,descid,version,platform,advbw". Note that the
+ * platform string may contain commas.
*/
private SortedMap<String, String> descriptors;
/**
- * map key "descid"
- * map value "published,descid,version,platform,bandwidth"
+ * Server descriptors as in <code>descriptors</code>, accessible by
+ * descriptor identifiers only, without knowing the publication time.
+ * Map keys are base-64-formatted descriptor identifiers, map values
+ * are formatted as map values in <code>descriptors</code>.
*/
private SortedMap<String, String> descById;
+ /**
+ * Tor relay versions that we care about.
+ */
+ private List<String> relayVersions;
+
+ /**
+ * Platforms (operating systems) that we care about.
+ */
+ private List<String> relayPlatforms;
+
+ /**
+ * Logger for this class.
+ */
private Logger logger;
+ // TODO should there be a modified flag, too?
+
/**
- * Initializes this class, including reading in results file
- * <code>stats/relay-version-stats</code> etc. Not that we don't read in
- * <code>stats/server-descriptors-raw</code>, because it can grow
- * really big!
+ * Initializes this class, without reading in any files. We're only
+ * reading in files when writing results to disk in
+ * <code>writeFiles</code>.
*/
- public ServerDescriptorStatsFileHandler() {
+ public ServerDescriptorStatsFileHandler(List<String> relayVersions,
+ List<String> relayPlatforms) {
- /* init files */
+ /* Memorize versions and platforms that we care about. */
+ this.relayVersions = relayVersions;
+ this.relayPlatforms = relayPlatforms;
+
+ /* Initialize local data structures. */
+ this.consensuses = new TreeMap<String, String>();
+ this.descriptors = new TreeMap<String, String>();
+ this.descById = new TreeMap<String, String>();
+
+ /* Initialize file names for intermediate and final results files. */
this.versionStatsFile = new File("stats/version-stats");
this.platformStatsFile = new File("stats/platform-stats");
this.bandwidthStatsFile = new File("stats/bandwidth-stats");
@@ -89,47 +146,59 @@ public class ServerDescriptorStatsFileHandler {
this.descriptorsFile = new File("stats/descriptors-raw");
this.descriptorsTempFile = new File("stats/descriptors-raw.temp");
- /* Initialize local data structures. */
- this.consensuses = new TreeMap<String, String>();
- this.descriptors = new TreeMap<String, String>();
- this.descById = new TreeMap<String, String>();
-
/* Initialize logger. */
this.logger =
Logger.getLogger(ServerDescriptorStatsFileHandler.class.getName());
- this.logger.fine("Initialized.");
}
- /* Just add to data structure. We cannot check whether we already got
- * it right now. The only thing we can check is whether we got this
- * consensus before in this run. */
+ /**
+ * Adds a consensus to the list with its valid-after time and a list of
+ * descriptor identifiers of relays that have the Running flag set. If
+ * the number of consensuses in memory exceeds a certain number, an
+ * auto-save mechanism is triggered by calling <code>writeFiles</code>.
+ */
public void addConsensus(String validAfter,
String descriptorIdentities) {
- // TODO should there be a modified flag, too?
+
+ /* Add consensus to the list. */
if (!this.consensuses.containsKey(validAfter)) {
- this.logger.finer("Adding");
+ this.logger.finer("Adding consensus published at " + validAfter
+ + ".");
} else {
- this.logger.fine("We already learned about this consensus in this "
- + "run. Overwriting.");
+ this.logger.fine("We already learned about a consensus published "
+ + "at " + validAfter + " in this execution. Overwriting.");
}
this.consensuses.put(validAfter, validAfter + ","
+ descriptorIdentities);
-
- // force autosave if we have too many data; 240 cons ^= 10 days
+
+ /* Check if we have more 240 consensuses in memory (covering 10 days).
+ * If so, trigger the auto-save mechanism. */
if (this.consensuses.size() > 240) {
this.logger.fine("Autosave triggered by adding consensus: We have "
- + this.consensuses.size() + " consensuses and " + this.descriptors.size()
- + " descriptors in memory. Writing to disk now.");
+ + this.consensuses.size() + " consensuses and "
+ + this.descriptors.size() + " descriptors in memory. Writing "
+ + "to disk now.");
this.writeFiles();
}
}
- // version string is the 0.2.1.23 part of the platform string
- // platform is platform string with all parts after { removed
- // advbw is in kibibytes
+ /**
+ * Adds a server descriptor to the list with its identity and the
+ * platform, published, and bandwidth lines. Version and operating
+ * system are parsed from the platform line. The parsed version consists
+ * only of the dotted numbers part (e.g. "0.2.1.2") without any
+ * additions like "-alpha". The operating system is the substring after
+ * " on " up to the first encountered opening curly bracket ("{").
+ * The publication time is extracted from the published line. The
+ * advertised bandwidth is calculated from the bandwidth line by taking
+ * the minimum of average and observed bandwidth, divided by 1024 to
+ * obtain KiB/s.
+ */
public void addServerDescriptor(String descriptorIdentity,
String platformLine, String publishedLine, String bandwidthLine) {
- // TODO should there be a modified flag, too?
+
+ /* Parse version, platform, and advertised bandwidth from the given
+ * lines. */
String version = "", platform = "", published = "", advBw = "";
if (platformLine.contains(" Tor ")) {
version = platformLine.substring(platformLine.indexOf(" Tor ") + 5).
@@ -144,44 +213,56 @@ public class ServerDescriptorStatsFileHandler {
published = publishedLine.substring("published ".length());
String[] bwParts = bandwidthLine.split(" ");
if (bwParts.length == 4) {
- advBw = "" + (Math.min(Long.parseLong(bwParts[1]),
- Long.parseLong(bwParts[3])) / 1024L);
- // TODO can't trust input! verify
+ try {
+ advBw = "" + (Math.min(Long.parseLong(bwParts[1]),
+ Long.parseLong(bwParts[3])) / 1024L);
+ } catch (NumberFormatException e) {
+ this.logger.log(Level.WARNING, "Exception while parsing average "
+ + "and observed bandwidth from line '" + bandwidthLine
+ + "'. Not adding server descriptor!", e);
+ return;
+ }
}
String key = published + "," + descriptorIdentity;
String line = key + "," + version + "," + platform + "," + advBw;
if (!this.descriptors.containsKey(key)) {
- this.logger.finer("Adding");
+ this.logger.finer("Adding server descriptor with identifier "
+ + descriptorIdentity + ".");
} else {
- this.logger.fine("We already learned about this server descriptor "
- + "in this run. Overwriting.");
+ this.logger.fine("We already learned about a server descriptor "
+ + "with identifier " + descriptorIdentity + ", published at "
+ + published + " in this execution. Overwriting.");
}
this.descriptors.put(key, line);
this.descById.put(descriptorIdentity, line);
- // force autosave if we have too many data; 50K descs ^= 10 days in early 2010
+ /* Check if we have more 50K server descriptors in memory (covering 10
+ * days as of early 2010). If so, trigger the auto-save mechanism. */
if (this.descriptors.size() > 50000) {
- this.logger.fine("Autosave triggered by adding descriptor: We have "
- + this.consensuses.size() + " consensuses and " + this.descriptors.size()
- + " descriptors in memory. Writing to disk now.");
+ this.logger.fine("Autosave triggered by adding server descriptor: "
+ + "We have " + this.consensuses.size() + " consensuses and "
+ + this.descriptors.size() + " descriptors in memory. Writing "
+ + "to disk now.");
this.writeFiles();
}
}
/**
- * Writes the newly learned consensuses and server descriptors to disk
- * and merges new findings about relay versions, platforms, and advertised
- * bandwidth with existing stats files.
+ * Merges the newly learned consensuses and server descriptors with the
+ * ones we wrote to disk earlier and extracts new statistics about relay
+ * version, platforms, and advertised bandwidth.
+ *
+ * This method is rather complex, because we can only store a limited
+ * number of consensuses and serer descriptors in memory. Also, we want
+ * to avoid going through the files twice, once for merging old and new
+ * lines and another time for extracting statistics.
*/
- /* why is this so complex? because the data doesn't fit into memory and
- * we want to avoid going through the file more than once (that is,
- * once for reading and once for writing) if at all possible. */
public void writeFiles() {
- // TODO use separate try blocks?
try {
- /* Initialize readers and writers for the two files. We are going to
- * write to temporary files, delete originals, and rename. */
+
+ /* Initialize readers for reading intermediate results files from
+ * disk. */
BufferedReader consensusesReader = null;
if (this.consensusesFile.exists()) {
consensusesReader = new BufferedReader(new FileReader(
@@ -193,48 +274,47 @@ public class ServerDescriptorStatsFileHandler {
this.descriptorsFile));
}
+ /* Prepare writing intermediate results. The idea is to write to
+ * temporary files while reading from the originals, delete the
+ * originals, and rename the temporary files to be the new
+ * originals. */
this.consensusesTempFile.getParentFile().mkdirs();
- BufferedWriter consensusesWriter = new BufferedWriter(new FileWriter(
- this.consensusesTempFile));
- BufferedWriter descriptorsWriter = new BufferedWriter(new FileWriter(
- this.descriptorsTempFile));
- BufferedWriter versionWriter = new BufferedWriter(new FileWriter(
- this.versionStatsFile));
- BufferedWriter platformWriter = new BufferedWriter(new FileWriter(
- this.platformStatsFile));
- BufferedWriter bandwidthWriter = new BufferedWriter(new FileWriter(
- this.bandwidthStatsFile));
+ BufferedWriter consensusesWriter = new BufferedWriter(
+ new FileWriter(this.consensusesTempFile));
+ BufferedWriter descriptorsWriter = new BufferedWriter(
+ new FileWriter(this.descriptorsTempFile));
+ /* Prepare date format parsers. */
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
SimpleDateFormat dateTimeFormat =
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ /* Prepare extracting statistics and writing them to disk. */
String statsDate = null;
- // TODO make these configurable
- List<String> versionKeys = new ArrayList<String>(Arrays.asList(
- "0.1.1,0.1.2,0.2.0,0.2.1,0.2.2".split(",")));
- List<String> platformKeys = new ArrayList<String>(Arrays.asList(
- "Windows,SunOS,OpenBSD,NetBSD,Linux,FreeBSD,DragonFly,Darwin".
- split(",")));
+ int[] versionStats = new int[this.relayVersions.size() + 1];
+ int[] platformStats = new int[this.relayPlatforms.size() + 1];
+ long bandwidthStats = 0L;
+ int consensusesAtThisDay = 0;
+ BufferedWriter versionWriter = new BufferedWriter(new FileWriter(
+ this.versionStatsFile));
+ BufferedWriter platformWriter = new BufferedWriter(new FileWriter(
+ this.platformStatsFile));
+ BufferedWriter bandwidthWriter = new BufferedWriter(new FileWriter(
+ this.bandwidthStatsFile));
versionWriter.write("date");
- for (String v : versionKeys) {
+ for (String v : this.relayVersions) {
versionWriter.write("," + v);
}
versionWriter.write(",other\n");
platformWriter.write("date");
- for (String p : platformKeys) {
- platformWriter.write("," + p.toLowerCase());
+ for (String p : this.relayPlatforms) {
+ platformWriter.write("," + p);
}
platformWriter.write(",other\n");
bandwidthWriter.write("date,advbw\n");
- int[] versionStats = new int[versionKeys.size() + 1];
- int[] platformStats = new int[platformKeys.size() + 1];
- long bandwidthStats = 0L;
- int consensusesAtThisDay = 0;
-
/* Always keep one line of the consensuses and descriptors file in
* memory. */
String consensusLine = consensusesReader != null ?
@@ -249,24 +329,25 @@ public class ServerDescriptorStatsFileHandler {
/* Find out which line we want to process now, memorize it for
* parsing below, advance the source from where we got the line,
- * and write the line to disk. Afterwards, line contains
+ * and write the line to disk. Afterwards, variable line contains
* the consensus line we want to parse in this iteration. */
- String line = null; // TODO rename
+ String line = null;
if (consensusLine != null) {
if (!this.consensuses.isEmpty()) {
- String fileVA = consensusLine.split(",")[0];
- String memVA = this.consensuses.firstKey();
- if (fileVA.equals(memVA)) {
- this.logger.finer("We have a consensus line in memory that "
- + "we already knew before. Skipping.");
- // TODO should we compare the two lines here?
+ String fileKey = consensusLine.split(",")[0];
+ String memKey = this.consensuses.firstKey();
+ if (fileKey.equals(memKey)) {
+ this.logger.finer("The consensus we read from disk has the "
+ + "same valid-after time (" + fileKey + ") time as a "
+ + "consensus we have in memory. Using the consensus "
+ + "from memory.");
consensusLine = consensusesReader.readLine();
- continue; // TODO is this correct?
- } else if (fileVA.compareTo(memVA) < 0) {
- line = consensusLine; // TODO rename
+ continue;
+ } else if (fileKey.compareTo(memKey) < 0) {
+ line = consensusLine;
consensusLine = consensusesReader.readLine();
} else {
- line = this.consensuses.remove(memVA);
+ line = this.consensuses.remove(memKey);
}
} else {
line = consensusLine;
@@ -277,8 +358,11 @@ public class ServerDescriptorStatsFileHandler {
}
consensusesWriter.write(line + "\n");
- /* Write all descriptor to disk that were published more than 24
- * hours before this consensus. */
+ /* Write all server descriptors to disk that were published more
+ * than 24 hours before the consensus we're about to process. Also
+ * remove those server descriptors from memory. The idea is that
+ * those server descriptors cannot be referenced from the
+ * consensus anyway and would only bloat our memory. */
String minus24h = dateTimeFormat.format(new Date(
dateTimeFormat.parse(line.split(",")[0]).getTime() -
(24L * 60L * 60L * 1000L)));
@@ -289,18 +373,24 @@ public class ServerDescriptorStatsFileHandler {
compareTo(minus24h) < 0)) {
if (descriptorLine != null) {
if (!this.descriptors.isEmpty()) {
- String filePubl = descriptorLine.substring(0, 47);
- // 47 chars: 19 for datetime, 1 for comma, 27 for descid
- String memPubl = this.descriptors.firstKey();
- if (filePubl.equals(memPubl)) {
- this.logger.finer("same desc. skipping.");
+ /* The first 47 chars contain the publication time (19
+ * chars), a comma (1 char), and the descriptor identifier
+ * (27 chars). */
+ String fileKey = descriptorLine.substring(0, 47);
+ String memKey = this.descriptors.firstKey();
+ if (fileKey.equals(memKey)) {
+ this.logger.finer("The server descriptor we read from "
+ + "disk has the same publication time and identifier "
+ + "(" + fileKey + ") as a server descriptor we have "
+ + "in memory. Using the server descriptor from "
+ + "memory.");
descriptorLine = descriptorsReader.readLine();
- continue; // TODO is this correct?
- } else if (filePubl.compareTo(memPubl) < 0) {
+ continue;
+ } else if (fileKey.compareTo(memKey) < 0) {
descriptorsWriter.write(descriptorLine + "\n");
descriptorLine = descriptorsReader.readLine();
} else {
- String removed = this.descriptors.remove(memPubl);
+ String removed = this.descriptors.remove(memKey);
this.descById.remove(removed.split(",")[1]);
descriptorsWriter.write(removed + "\n");
}
@@ -316,8 +406,12 @@ public class ServerDescriptorStatsFileHandler {
}
}
- /* Read in all descriptors that were published in the last 24
- * hours before the consensus that we're just parsing. */
+ /* Read in all server descriptors that were published in the last
+ * 24 hours before the consensus that we're just processing. These
+ * server descriptors might be referenced from the consensus.
+ * Store references to these server descriptors by identifier to
+ * facilitate matching a consensus entry with the corresponding
+ * server descriptor. */
String validAfter = line.split(",")[0];
while (descriptorsReader != null && descriptorLine != null &&
descriptorLine.split(",")[0].compareTo(validAfter) < 0) {
@@ -328,15 +422,15 @@ public class ServerDescriptorStatsFileHandler {
}
/* Now we have a consensus line we want to parse and all possibly
- * referenced descriptors in descById (rename). Let's write some
- * stats. */
+ * referenced descriptors in descById. Let's write some stats. */
String consensusDate = line.substring(0, 10);
if (statsDate == null) {
statsDate = consensusDate;
}
if (!statsDate.equals(consensusDate)) {
- /* If we have parsed at least half of the consensuses of a day,
- * Write stats to disk. */ // TODO document this somewhere
+ /* We have finished one day of consensuses. If we have parsed at
+ * least half of the possible 24 consensuses of that day, write
+ * stats to disk. */
if (consensusesAtThisDay >= 12) {
versionWriter.write(statsDate);
for (int i = 0; i < versionStats.length; i++) {
@@ -355,12 +449,11 @@ public class ServerDescriptorStatsFileHandler {
} else {
this.logger.fine("Not enough consensuses to write to stats.");
}
- versionStats = new int[versionKeys.size() + 1];
- platformStats = new int[platformKeys.size() + 1];
- bandwidthStats = 0L;
- consensusesAtThisDay = 0;
- // fill in NA's for missing dates
+ /* Fill in NA's for missing dates. */
long writtenMillis = dateFormat.parse(statsDate).getTime();
+ if (consensusesAtThisDay < 12) {
+ writtenMillis -= 24L * 60L * 60L * 1000L;
+ }
long nextMillis = dateFormat.parse(consensusDate).getTime();
while (writtenMillis + (24L * 60L * 60L * 1000L) < nextMillis) {
writtenMillis += 24L * 60L * 60L * 1000L;
@@ -377,15 +470,20 @@ public class ServerDescriptorStatsFileHandler {
platformWriter.write(",NA\n");
bandwidthWriter.write(date + ",NA\n");
}
-
+ /* Clear counters to collect next day's statistics. */
+ versionStats = new int[this.relayVersions.size() + 1];
+ platformStats = new int[this.relayPlatforms.size() + 1];
+ bandwidthStats = 0L;
+ consensusesAtThisDay = 0;
statsDate = consensusDate;
}
- /* Parse all descriptors that are referenced from this consensus.
- * only add values if we have 90+ % of all ref. descriptors!!
- * TODO document this somewhere! */
- int[] versionStatsCons = new int[versionKeys.size() + 1];
- int[] platformStatsCons = new int[platformKeys.size() + 1];
+ /* For the given consensus, parse all referenced server
+ * descriptors to obtain statistics on versions, platforms, and
+ * advertised bandwidth. Only include these values if we have at
+ * least 90 % of all referenced server descriptors. */
+ int[] versionStatsCons = new int[this.relayVersions.size() + 1];
+ int[] platformStatsCons = new int[this.relayPlatforms.size() + 1];
long bandwidthStatsCons = 0L;
String[] ids = line.split(",");
int seenDescs = 0;
@@ -396,20 +494,16 @@ public class ServerDescriptorStatsFileHandler {
String[] parts = desc.split(",");
String version = parts[2].substring(0,
parts[2].lastIndexOf("."));
- if (versionKeys.contains(version)) {
- versionStatsCons[versionKeys.indexOf(version)]++;
+ if (this.relayVersions.contains(version)) {
+ versionStatsCons[this.relayVersions.indexOf(version)]++;
} else {
versionStatsCons[versionStatsCons.length - 1]++;
}
String platform = parts[3].toLowerCase();
boolean isOther = true;
- // TODO document that order of platform strings in config
- // matters! if there are two OS, "DragonFly" and "Dragon",
- // put "DragonFly" first! capitalization doesn't matter, but
- // is only relevant for stats file headers
- for (String p : platformKeys) {
+ for (String p : this.relayPlatforms) {
if (platform.contains(p.toLowerCase())) {
- platformStatsCons[platformKeys.indexOf(p)]++;
+ platformStatsCons[this.relayPlatforms.indexOf(p)]++;
isOther = false;
break;
}
@@ -431,37 +525,46 @@ public class ServerDescriptorStatsFileHandler {
bandwidthStats += bandwidthStatsCons;
consensusesAtThisDay++;
} else {
- this.logger.fine("not enough server descriptors for consensus, "
- + "less than 90%. not including in stats.");
+ this.logger.fine("Not enough referenced server descriptors for "
+ + "consensus with valid-after time " + line.substring(0, 19)
+ + ". Not including this consensus in the statistics.");
}
/* We're done reading one consensus. */
}
- /* Write remaining server descriptors to disk. */
+ /* We're done reading all consensuses, both from disk and from
+ * memory. Write remaining server descriptors to disk. These are the
+ * server descriptors that were published 24 hours before the last
+ * parsed consensus and those server descriptors published
+ * afterwards. */
while (descriptorLine != null || !this.descriptors.isEmpty()) {
if (descriptorLine != null) {
if (!this.descriptors.isEmpty()) {
- String filePubl = descriptorLine.substring(0, 47);
- // 47 chars: 19 for datetime, 1 for comma, 27 for descid
- String memPubl = this.descriptors.firstKey();
- if (filePubl.equals(memPubl)) {
- this.logger.finer("same desc. skipping.");
+ String fileKey = descriptorLine.substring(0, 47);
+ String memKey = this.descriptors.firstKey();
+ if (fileKey.equals(memKey)) {
+ this.logger.finer("The server descriptor we read from "
+ + "disk has the same publication time and identifier "
+ + "(" + fileKey + ") as a server descriptor we have "
+ + "in memory. Using the server descriptor from "
+ + "memory.");
descriptorLine = descriptorsReader.readLine();
- continue; // TODO is this correct?
- } else if (filePubl.compareTo(memPubl) < 0) {
+ continue;
+ } else if (fileKey.compareTo(memKey) < 0) {
descriptorsWriter.write(descriptorLine + "\n");
descriptorLine = descriptorsReader.readLine();
} else {
- descriptorsWriter.write(this.descriptors.remove(memPubl) + "\n");
+ descriptorsWriter.write(this.descriptors.remove(memKey)
+ + "\n");
}
} else {
descriptorsWriter.write(descriptorLine + "\n");
descriptorLine = descriptorsReader.readLine();
}
} else {
- descriptorsWriter.write(this.descriptors.remove(this.descriptors.firstKey())
- + "\n");
+ descriptorsWriter.write(this.descriptors.remove(
+ this.descriptors.firstKey()) + "\n");
}
}
this.descById.clear();
@@ -478,6 +581,9 @@ public class ServerDescriptorStatsFileHandler {
bandwidthWriter.close();
versionWriter.close();
platformWriter.close();
+
+ /* Delete original files and rename temporary files to be the new
+ * originals. */
if (this.consensusesFile.exists()) {
this.consensusesFile.delete();
}
@@ -488,10 +594,10 @@ public class ServerDescriptorStatsFileHandler {
this.descriptorsTempFile.renameTo(this.descriptorsFile);
/* Done. Whee! */
+ this.logger.fine("Finished writing.");
+
} catch (Exception e) {
this.logger.log(Level.WARNING, "Exception while writing files.", e);
}
- this.logger.fine("Finished writing.");
}
}
-
--
1.6.5