[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [ernie/master] Add detailed documentation to one of the R scripts.
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Sat, 5 Jun 2010 11:43:29 +0200
Subject: Add detailed documentation to one of the R scripts.
Commit: d790e9b4620969e6549a1098ef8dd3500aad724d
---
R/descriptor-stats.R | 78 ++++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 73 insertions(+), 5 deletions(-)
diff --git a/R/descriptor-stats.R b/R/descriptor-stats.R
index 9cb25a8..3ee5d86 100644
--- a/R/descriptor-stats.R
+++ b/R/descriptor-stats.R
@@ -1,22 +1,74 @@
+# R script to plot relay versions, platforms, and advertised bandwidth.
+# Run from ERNIE's base directory as "R --slave < R/descriptor.stats.R".
+
+# Suppress all warnings, so that only errors are written to stdout. This
+# is useful when executing this script from cron and having it mail out a
+# notification only when there's an actual problem.
options(warn = -1)
+
+# Import library ggplot2 that is used for plotting. Suppress package
+# startup messages for the same reason as suppressing warnings.
suppressPackageStartupMessages(library("ggplot2"))
+# Define a function to plot relay versions. Right now, there are no
+# parameters for this function. In the future, a possible parameter would
+# be the time interval to be plotted on the x axis.
plot_versions <- function() {
+
+ # Transform data frame versions into a data frame that can be processed
+ # by ggplot2. In particular, versions has one row per date and multiple
+ # columns for the number of relays running a particular Tor version at
+ # that date. What we need for plotting is a single data point per row
+ # with additional columns for classification, e.g., which version this
+ # date point belongs to. Add commands "print(versions)" and "print(v)"
+ # for an example.
v <- melt(versions, id = "date")
- ggplot(v, aes(x = date, y = value, colour = variable)) +
+
+ # Start plotting the data in data frame v.
+ ggplot(v,
+
+ # Tell ggplot2 how to understand the data in data frame v. The date
+ # shall be plotted on the x axis, the value on the y axis, and the
+ # row called variable shall be used to distinguish data sets by color.
+ aes(x = date, y = value, colour = variable)) +
+
+ # So far, ggplot2 only knows how to understand the data, but not how
+ # to visualize them. Draw a line from the data with line size 1.
geom_line(size = 1) +
- scale_x_date(name = "") + scale_y_continuous(name = "",
+
+ # Override the default x axis which would display a label "date" with
+ # an x axis that has no label. This line can be commented out.
+ scale_x_date(name = "") +
+
+ # Override the default y axis with label "value" with one that has no
+ # label and that starts at the origin. Note that the max() function is
+ # told to remove NA values. These lines can be commented out.
+ scale_y_continuous(name = "",
limits = c(0, max(v$value, na.rm = TRUE))) +
+
+ # Override the categorization by relay version to use a different
+ # color scheme (brewer instead of hue), have a different legend title
+ # ("Tor versions" instead of "variable") and display custom legend
+ # labels ("0.2.2" instead of "X0.2.2"). These lines can be commented
+ # out.
scale_colour_brewer(name = "Tor version",
breaks = rev(names(versions)[2:length(names(versions))]),
labels = c("other",
substr(rev(names(versions)[2:(length(names(versions)) - 1)]),
2, 6))) +
+
+ # Add a graph title. This line can be commented out together with the
+ # '+' character in the last non-comment line.
opts(title = "Relay versions\n")
+
+ # Save the generated graph to the following path with given width,
+ # height, and resolution.
ggsave(filename = "website/graphs/descriptors/versions.png",
- width = 8, height = 5, dpi = 72)
+ width = 8, height = 5, dpi = 72)
}
+# Define a function to plot relay platforms. See the similar function
+# plot_versions() for details.
plot_platforms <- function() {
p <- melt(platforms, id = "date")
ggplot(p, aes(x = date, y = value, colour = variable)) +
@@ -28,9 +80,11 @@ plot_platforms <- function() {
labels = rev(names(platforms)[2:length(names(platforms))])) +
opts(title = "Relay platforms\n")
ggsave(filename = "website/graphs/descriptors/platforms.png",
- width = 8, height = 5, dpi = 72)
+ width = 8, height = 5, dpi = 72)
}
+# Define a function to plot advertised bandwidth. See the similar function
+# plot_versions() for details.
plot_bandwidth <- function() {
ggplot(bandwidth, aes(x = date, y = advbw / 1024)) + geom_line() +
scale_x_date(name = "") +
@@ -38,17 +92,29 @@ plot_bandwidth <- function() {
limits = c(0, max(bandwidth$advbw / 1024, na.rm = TRUE))) +
opts(title = "Total advertised bandwidth\n")
ggsave(filename = "website/graphs/descriptors/bandwidth.png",
- width = 8, height = 5, dpi = 72)
+ width = 8, height = 5, dpi = 72)
}
+# If a CSV file with version data exists, ...
if (file.exists("stats/version-stats")) {
+
+ # Read in the file, declare that the first line has the column names,
+ # and define the type of the first column as Date.
versions <- read.csv("stats/version-stats", header = TRUE,
colClasses = c(date = "Date"))
+
+ # Write the same data to disk without putting in quotes around strings
+ # and without adding row numbers. This file can be downloaded by others
+ # to run their own evaluations.
write.csv(versions, "website/csv/versions.csv", quote = FALSE,
row.names = FALSE)
+
+ # Call the function defined above to plot relay versions.
plot_versions()
}
+# If a CSV file with platform data exists, read it, copy it to the
+# website, and plot a platform graph.
if (file.exists("stats/platform-stats")) {
platforms <- read.csv("stats/platform-stats", header = TRUE,
colClasses = c(date = "Date"))
@@ -57,6 +123,8 @@ if (file.exists("stats/platform-stats")) {
plot_platforms()
}
+# If a CSV file with bandwidth data exists, read it, copy it to the
+# website, and plot a bandwidth graph.
if (file.exists("stats/bandwidth-stats")) {
bandwidth <- read.csv("stats/bandwidth-stats", header = TRUE,
colClasses = c(date = "Date"))
--
1.6.5