[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-web/master] Split up clients.csv for faster graphs.
commit a91f2dc9f51c2bd0e7c20e13a84c66dcb60ccd3d
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed Oct 26 15:23:25 2016 +0200
Split up clients.csv for faster graphs.
Most client graphs (except for clients by country and transport) use
the same clients.csv file as input. That file has grown to 26M by
now, and it seems wasteful to read numbers for clients connecting to
bridges (relays) when graphing clients connecting to relays (bridges).
Split up clients.csv and take out the node column.
Performance gain is 1.3 seconds for updating graphs on directly
connecting clients and 2.0 seconds for graphs showing clients
connecting via bridges.
---
modules/clients/split-clients.R | 6 ++++++
shared/bin/80-run-clients-stats.sh | 5 ++++-
shared/bin/99-copy-stats-files.sh | 2 +-
website/rserve/graphs.R | 26 ++++++++++++--------------
4 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/modules/clients/split-clients.R b/modules/clients/split-clients.R
new file mode 100644
index 0000000..5f3cb74
--- /dev/null
+++ b/modules/clients/split-clients.R
@@ -0,0 +1,6 @@
+u <- read.csv("clients.csv", stringsAsFactors = FALSE)
+write.csv(u[u$node == 'relay', names(u) != "node"], 'clients-relay.csv',
+ quote = FALSE, row.names = FALSE, na = '')
+write.csv(u[u$node == 'bridge', names(u) != "node"], 'clients-bridge.csv',
+ quote = FALSE, row.names = FALSE, na = '')
+
diff --git a/shared/bin/80-run-clients-stats.sh b/shared/bin/80-run-clients-stats.sh
index b296c37..a3efbe3 100755
--- a/shared/bin/80-run-clients-stats.sh
+++ b/shared/bin/80-run-clients-stats.sh
@@ -21,8 +21,11 @@ python detector.py
echo `date` "Merging censorship detector results."
R --slave -f merge-clients.R > /dev/null 2>&1
+
+echo `date` "Splitting results file."
+R --slave -f split-clients.R > /dev/null 2>&1
mkdir -p stats/
-cp clients.csv stats/
+cp clients*.csv stats/
cp userstats-combined.csv stats/
echo `date` "Terminating."
diff --git a/shared/bin/99-copy-stats-files.sh b/shared/bin/99-copy-stats-files.sh
index 504216a..6daf22b 100755
--- a/shared/bin/99-copy-stats-files.sh
+++ b/shared/bin/99-copy-stats-files.sh
@@ -4,6 +4,6 @@ cp -a modules/legacy/stats/*.csv shared/stats/
cp -a modules/connbidirect/stats/connbidirect2.csv shared/stats/
cp -a modules/advbwdist/stats/advbwdist.csv shared/stats/
cp -a modules/hidserv/stats/hidserv.csv shared/stats/
-cp -a modules/clients/stats/clients.csv shared/stats/
+cp -a modules/clients/stats/clients*.csv shared/stats/
cp -a modules/clients/stats/userstats-combined.csv shared/stats/
diff --git a/website/rserve/graphs.R b/website/rserve/graphs.R
index e3ccb06..6f7e119 100644
--- a/website/rserve/graphs.R
+++ b/website/rserve/graphs.R
@@ -766,21 +766,21 @@ plot_userstats <- function(start, end, node, variable, value, events,
path) {
end <- min(end, as.character(Sys.Date() - 2))
c <- read.csv(paste("/srv/metrics.torproject.org/metrics/shared/stats/",
- "clients.csv", sep = ""), stringsAsFactors = FALSE)
+ "clients-", node, ".csv", sep = ""),
+ stringsAsFactors = FALSE)
u <- c[c$date >= start & c$date <= end, ]
- u <- rbind(u, data.frame(date = start, node = node,
+ u <- rbind(u, data.frame(date = start,
country = ifelse(variable == 'country' & value != 'all', value, ''),
transport = ifelse(variable == 'transport', value, ''),
version = ifelse(variable == 'version', value, ''),
lower = 0, upper = 0, clients = 0, frac = 0))
if (node == 'relay') {
if (value != 'all') {
- u <- u[u$country == value & u$node == 'relay', ]
+ u <- u[u$country == value, ]
title <- paste("Directly connecting users from ",
countryname(value), "\n", sep = "")
} else {
- u <- u[u$country == '' & u$transport == '' & u$version == '' &
- u$node == 'relay', ]
+ u <- u[u$country == '', ]
title <- "Directly connecting users\n"
}
u <- aggregate(list(lower = u$lower, upper = u$upper,
@@ -790,20 +790,19 @@ plot_userstats <- function(start, end, node, variable, value, events,
FUN = sum)
} else if (variable == 'transport') {
if ('!<OR>' %in% value) {
- n <- u[u$transport != '' & u$transport != '<OR>' &
- u$node == 'bridge', ]
+ n <- u[u$transport != '' & u$transport != '<OR>', ]
n <- aggregate(list(lower = n$lower, upper = n$upper,
clients = n$clients),
by = list(date = n$date),
FUN = sum)
- u <- rbind(u, data.frame(date = n$date, node = 'bridge',
+ u <- rbind(u, data.frame(date = n$date,
country = '', transport = '!<OR>',
version = '', lower = n$lower,
upper = n$upper, clients = n$clients,
frac = NA))
}
if (length(value) > 1) {
- u <- u[u$transport %in% value & u$node == 'bridge', ]
+ u <- u[u$transport %in% value, ]
u <- aggregate(list(lower = u$lower, upper = u$upper,
users = u$clients),
by = list(date = as.Date(u$date, "%Y-%m-%d"),
@@ -811,7 +810,7 @@ plot_userstats <- function(start, end, node, variable, value, events,
FUN = sum)
title <- paste("Bridge users by transport\n")
} else {
- u <- u[u$transport == value & u$node == 'bridge', ]
+ u <- u[u$transport == value, ]
u <- aggregate(list(lower = u$lower, upper = u$upper,
users = u$clients),
by = list(date = as.Date(u$date, "%Y-%m-%d"),
@@ -826,7 +825,7 @@ plot_userstats <- function(start, end, node, variable, value, events,
paste('transport', value)))))), "\n", sep = "")
}
} else if (variable == 'version') {
- u <- u[u$version == value & u$node == 'bridge', ]
+ u <- u[u$version == value, ]
title <- paste("Bridge users using IP", value, "\n", sep = "")
u <- aggregate(list(lower = u$lower, upper = u$upper,
users = u$clients),
@@ -835,12 +834,11 @@ plot_userstats <- function(start, end, node, variable, value, events,
FUN = sum)
} else {
if (value != 'all') {
- u <- u[u$country == value & u$node == 'bridge', ]
+ u <- u[u$country == value, ]
title <- paste("Bridge users from ", countryname(value),
"\n", sep = "")
} else {
- u <- u[u$country == '' & u$transport == '' & u$version == '' &
- u$node == 'bridge', ]
+ u <- u[u$country == '' & u$transport == '' & u$version == '', ]
title <- "Bridge users\n"
}
u <- aggregate(list(lower = u$lower, upper = u$upper,
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits