[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [tor/master] Fix bridge statistics.
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Sat, 12 Dec 2009 08:32:46 +0100
Subject: Fix bridge statistics.
Commit: e1e5c1b3aba96afbd6fa3b5ece262720dd45a80e
Fix statistics on client numbers by country as seen by bridges that were
broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour intervals
instead of variable 12-to-48-hour intervals.
---
ChangeLog | 3 +
doc/spec/control-spec.txt | 10 ++--
doc/spec/dir-spec.txt | 31 ++++++++-
src/or/control.c | 22 ++-----
src/or/geoip.c | 167 +++++++++++++++++++++++++++++++++++++++++++++
src/or/main.c | 27 +++++++-
src/or/or.h | 8 ++-
src/or/router.c | 48 +++----------
8 files changed, 251 insertions(+), 65 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 2cb2383..56c8958 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -25,6 +25,9 @@ Changes in version 0.2.2.7-alpha - 2009-??-??
- Do not segfault when writing buffer stats when we haven't observed
a single circuit to report about. Found by Fabian Lanze. Bugfix on
0.2.2.1-alpha.
+ - Fix statistics on client numbers by country as seen by bridges that
+ were broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour
+ intervals instead of variable 12-to-48-hour intervals.
Changes in version 0.2.2.6-alpha - 2009-11-19
diff --git a/doc/spec/control-spec.txt b/doc/spec/control-spec.txt
index 33d4658..73f0b02 100644
--- a/doc/spec/control-spec.txt
+++ b/doc/spec/control-spec.txt
@@ -1637,11 +1637,11 @@
TimeStarted is a quoted string indicating when the reported summary
counts from (in GMT).
- The CountrySummary keyword has as its argument a comma-separated
- set of "countrycode=count" pairs. For example,
- 650-CLIENTS_SEEN TimeStarted="Thu Dec 25 23:50:43 EST 2008"
- 650 CountrySummary=us=16,de=8,uk=8
-[XXX Matt Edman informs me that the time format above is wrong. -RD]
+ The CountrySummary keyword has as its argument a comma-separated,
+ possibly empty set of "countrycode=count" pairs. For example (without
+ linebreak),
+ 650-CLIENTS_SEEN TimeStarted="2008-12-25 23:50:43"
+ CountrySummary=us=16,de=8,uk=8
4.1.15. New consensus networkstatus has arrived.
diff --git a/doc/spec/dir-spec.txt b/doc/spec/dir-spec.txt
index 5eedbd5..0bfc758 100644
--- a/doc/spec/dir-spec.txt
+++ b/doc/spec/dir-spec.txt
@@ -627,8 +627,8 @@
As documented in 2.1 above. See migration notes in section 2.2.1.
- "geoip-start" YYYY-MM-DD HH:MM:SS NL
- "geoip-client-origins" CC=N,CC=N,... NL
+ ("geoip-start" YYYY-MM-DD HH:MM:SS NL)
+ ("geoip-client-origins" CC=N,CC=N,... NL)
Only generated by bridge routers (see blocking.pdf), and only
when they have been configured with a geoip database.
@@ -641,6 +641,33 @@
"geoip-start" is the time at which we began collecting geoip
statistics.
+ "geoip-start" and "geoip-client-origins" have been replaced by
+ "bridge-stats-end" and "bridge-stats-ips" in 0.2.2.4-alpha. The
+ reason is that the measurement interval with "geoip-stats" as
+ determined by subtracting "geoip-start" from "published" could
+ have had a variable length, whereas the measurement interval in
+ 0.2.2.4-alpha and later is set to be exactly 24 hours long. In
+ order to clearly distinguish the new measurement intervals from
+ the old ones, the new keywords have been introduced.
+
+ "bridge-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+ [At most once.]
+
+ YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+ interval of length NSEC seconds (86400 seconds by default).
+
+ A "bridge-stats-end" line, as well as any other "bridge-*" line,
+ is only added when the relay has been running as a bridge for at
+ least 24 hours.
+
+ "bridge-ips" CC=N,CC=N,... NL
+ [At most once.]
+
+ List of mappings from two-letter country codes to the number of
+ unique IP addresses that have connected from that country to the
+ bridge and which are no known relays, rounded up to the nearest
+ multiple of 8.
+
"dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
[At most once.]
diff --git a/src/or/control.c b/src/or/control.c
index c3567bd..d3ed858 100644
--- a/src/or/control.c
+++ b/src/or/control.c
@@ -1755,21 +1755,10 @@ getinfo_helper_events(control_connection_t *control_conn,
"information", question);
}
} else if (!strcmp(question, "status/clients-seen")) {
- char geoip_start[ISO_TIME_LEN+1];
- size_t answer_len;
- char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL));
-
- if (!geoip_summary)
+ char *bridge_stats = geoip_get_bridge_stats_controller(time(NULL));
+ if (!bridge_stats)
return -1;
-
- answer_len = strlen("TimeStarted=\"\" CountrySummary=") +
- ISO_TIME_LEN + strlen(geoip_summary) + 1;
- *answer = tor_malloc(answer_len);
- format_iso_time(geoip_start, geoip_get_history_start());
- tor_snprintf(*answer, answer_len,
- "TimeStarted=\"%s\" CountrySummary=%s",
- geoip_start, geoip_summary);
- tor_free(geoip_summary);
+ *answer = bridge_stats;
} else {
return 0;
}
@@ -3846,10 +3835,9 @@ control_event_bootstrap_problem(const char *warn, int reason)
* from recently. Send a copy to the controller in case it wants to
* display it for the user. */
void
-control_event_clients_seen(const char *timestarted, const char *countries)
+control_event_clients_seen(const char *controller_str)
{
send_control_event(EVENT_CLIENTS_SEEN, 0,
- "650 CLIENTS_SEEN TimeStarted=\"%s\" CountrySummary=%s\r\n",
- timestarted, countries);
+ "650 CLIENTS_SEEN %s\r\n", controller_str);
}
diff --git a/src/or/geoip.c b/src/or/geoip.c
index a57e1fb..a43daaf 100644
--- a/src/or/geoip.c
+++ b/src/or/geoip.c
@@ -1081,6 +1081,173 @@ geoip_dirreq_stats_write(time_t now)
tor_free(data_v3);
}
+/** Start time of bridge stats. */
+static time_t start_of_bridge_stats_interval;
+
+/** Initialize bridge stats. */
+void
+geoip_bridge_stats_init(time_t now)
+{
+ start_of_bridge_stats_interval = now;
+}
+
+/** Parse the bridge statistics as they are written to extra-info
+ * descriptors for being returned to controller clients. Return the
+ * controller string if successful, or NULL otherwise. */
+static char *
+parse_bridge_stats_controller(const char *stats_str, time_t now)
+{
+ char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1],
+ *controller_str, *eos;
+ const char *stats_end_line = "bridge-stats-end",
+ *ips_line = "bridge-ips", *tmp;
+ time_t stats_end_time;
+ size_t controller_len;
+ int seconds;
+ tor_assert(stats_str);
+
+ /* Parse timestamp and number of seconds from
+ "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */
+ tmp = strstr(stats_str, stats_end_line);
+ if (!tmp || strlen(tmp) < strlen(stats_end_line) + 1 + ISO_TIME_LEN + 6)
+ return NULL;
+ strlcpy(stats_end_str, tmp + strlen(stats_end_line) + 1,
+ sizeof(stats_end_str));
+ if (parse_iso_time(stats_end_str, &stats_end_time) < 0)
+ return NULL;
+ if (stats_end_time < now - (25*60*60) ||
+ stats_end_time > now + (1*60*60))
+ return NULL;
+ seconds = (int)strtol(tmp + strlen(stats_end_line) + 1 +
+ ISO_TIME_LEN + 2, &eos, 10);
+ if (!eos || seconds < 23*60*60)
+ return NULL;
+ format_iso_time(stats_start_str, stats_end_time - seconds);
+
+ /* Parse: "bridge-ips CC=N,CC=N,..." */
+ tmp = strstr(tmp, ips_line);
+ if (!tmp || strlen(tmp) < strlen(ips_line))
+ return NULL;
+ if (strlen(tmp) > strlen(ips_line) + 2)
+ tmp += strlen(ips_line) + 1;
+ else
+ tmp = "";
+ controller_len = strlen("TimeStarted=\"\" CountrySummary=") +
+ ISO_TIME_LEN + strlen(tmp) + 1;
+ controller_str = tor_malloc(controller_len);
+ if (tor_snprintf(controller_str, controller_len,
+ "TimeStarted=\"%s\" CountrySummary=%s",
+ stats_start_str, tmp) < 0) {
+ tor_free(controller_str);
+ return NULL;
+ }
+ return controller_str;
+}
+
+/** Most recent bridge statistics formatted to be written to extra-info
+ * descriptors. */
+static char *bridge_stats_extrainfo = NULL;
+
+/** Most recent bridge statistics formatted to be returned to controller
+ * clients. */
+static char *bridge_stats_controller = NULL;
+
+/** Write bridge statistics to $DATADIR/stats/bridge-stats and return
+ * when we should next try to write statistics. */
+time_t
+geoip_bridge_stats_write(time_t now)
+{
+ char *statsdir = NULL, *filename = NULL, *data = NULL,
+ written[ISO_TIME_LEN+1], *out = NULL, *controller_str;
+ size_t len;
+
+ /* If we changed from relay to bridge recently, adapt starting time
+ * of current measurements. */
+ if (start_of_bridge_stats_interval < client_history_starts)
+ start_of_bridge_stats_interval = client_history_starts;
+
+ /* Check if 24 hours have passed since starting measurements. */
+ if (now < start_of_bridge_stats_interval +
+ DIR_ENTRY_RECORD_USAGE_RETAIN_IPS)
+ return start_of_bridge_stats_interval +
+ DIR_ENTRY_RECORD_USAGE_RETAIN_IPS;
+
+ /* Discard all items in the client history that are too old. */
+ geoip_remove_old_clients(start_of_bridge_stats_interval);
+
+ statsdir = get_datadir_fname("stats");
+ if (check_private_dir(statsdir, CPD_CREATE) < 0)
+ goto done;
+ filename = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats");
+ data = geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT);
+ format_iso_time(written, now);
+ len = strlen("bridge-stats-end (999999 s)\nbridge-ips \n") +
+ ISO_TIME_LEN + (data ? strlen(data) : 0) + 1;
+ out = tor_malloc(len);
+ if (tor_snprintf(out, len, "bridge-stats-end %s (%u s)\nbridge-ips %s\n",
+ written, (unsigned) (now - start_of_bridge_stats_interval),
+ data ? data : "") < 0)
+ goto done;
+ write_str_to_file(filename, out, 0);
+ controller_str = parse_bridge_stats_controller(out, now);
+ if (!controller_str)
+ goto done;
+ start_of_bridge_stats_interval = now;
+ tor_free(bridge_stats_extrainfo);
+ tor_free(bridge_stats_controller);
+ bridge_stats_extrainfo = out;
+ out = NULL;
+ bridge_stats_controller = controller_str;
+ control_event_clients_seen(controller_str);
+ done:
+ tor_free(filename);
+ tor_free(statsdir);
+ tor_free(data);
+ tor_free(out);
+ return start_of_bridge_stats_interval +
+ DIR_ENTRY_RECORD_USAGE_RETAIN_IPS;
+}
+
+/** Try to load the most recent bridge statistics from disk, unless we
+ * have finished a measurement interval lately. */
+static void
+load_bridge_stats(time_t now)
+{
+ char *fname, *contents, *controller_str;
+ if (bridge_stats_extrainfo)
+ return;
+ fname = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats");
+ contents = read_file_to_str(fname, 0, NULL);
+ if (contents) {
+ controller_str = parse_bridge_stats_controller(contents, now);
+ if (controller_str) {
+ bridge_stats_extrainfo = contents;
+ bridge_stats_controller = controller_str;
+ } else {
+ tor_free(contents);
+ }
+ }
+ tor_free(fname);
+}
+
+/** Return most recent bridge statistics for inclusion in extra-info
+ * descriptors, or NULL if we don't have recent bridge statistics. */
+char *
+geoip_get_bridge_stats_extrainfo(time_t now)
+{
+ load_bridge_stats(now);
+ return bridge_stats_extrainfo;
+}
+
+/** Return most recent bridge statistics to be returned to controller
+ * clients, or NULL if we don't have recent bridge statistics. */
+char *
+geoip_get_bridge_stats_controller(time_t now)
+{
+ load_bridge_stats(now);
+ return bridge_stats_controller;
+}
+
/** Start time of entry stats. */
static time_t start_of_entry_stats_interval;
diff --git a/src/or/main.c b/src/or/main.c
index 4dc182e..101c084 100644
--- a/src/or/main.c
+++ b/src/or/main.c
@@ -832,6 +832,8 @@ run_scheduled_events(time_t now)
static time_t time_to_recheck_bandwidth = 0;
static time_t time_to_check_for_expired_networkstatus = 0;
static time_t time_to_write_stats_files = 0;
+ static time_t time_to_write_bridge_stats = 0;
+ static int should_init_bridge_stats = 1;
static time_t time_to_retry_dns_init = 0;
or_options_t *options = get_options();
int i;
@@ -966,7 +968,8 @@ run_scheduled_events(time_t now)
if (options->CellStatistics || options->DirReqStatistics ||
options->EntryStatistics || options->ExitPortStatistics) {
if (!time_to_write_stats_files) {
- /* Initialize stats. */
+ /* Initialize stats. We're doing this here and not in options_act,
+ * so that we know exactly when the 24 hours interval ends. */
if (options->CellStatistics)
rep_hist_buffer_stats_init(now);
if (options->DirReqStatistics)
@@ -998,6 +1001,28 @@ run_scheduled_events(time_t now)
}
}
+ /* 1h. Check whether we should write bridge statistics to disk.
+ */
+ if (should_record_bridge_info(options)) {
+ if (time_to_write_bridge_stats < now) {
+ if (should_init_bridge_stats) {
+ /* (Re-)initialize bridge statistics. */
+ geoip_bridge_stats_init(now);
+ time_to_write_bridge_stats = now + WRITE_STATS_INTERVAL;
+ should_init_bridge_stats = 0;
+ } else {
+ /* Possibly write bridge statistics to disk and ask when to write
+ * them next time. */
+ time_to_write_bridge_stats = geoip_bridge_stats_write(
+ time_to_write_bridge_stats);
+ }
+ }
+ } else if (!should_init_bridge_stats) {
+ /* Bridge mode was turned off. Ensure that stats are re-initialized
+ * next time bridge mode is turned on. */
+ should_init_bridge_stats = 1;
+ }
+
/* Remove old information from rephist and the rend cache. */
if (time_to_clean_caches < now) {
rep_history_clean(now - options->RephistTrackTime);
diff --git a/src/or/or.h b/src/or/or.h
index 736e66a..85f549f 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3630,8 +3630,7 @@ typedef enum {
void control_event_bootstrap(bootstrap_status_t status, int progress);
void control_event_bootstrap_problem(const char *warn, int reason);
-void control_event_clients_seen(const char *timestarted,
- const char *countries);
+void control_event_clients_seen(const char *controller_str);
#ifdef CONTROL_PRIVATE
/* Used only by control.c and test.c */
@@ -4089,6 +4088,10 @@ void geoip_dirreq_stats_init(time_t now);
void geoip_dirreq_stats_write(time_t now);
void geoip_entry_stats_init(time_t now);
void geoip_entry_stats_write(time_t now);
+void geoip_bridge_stats_init(time_t now);
+time_t geoip_bridge_stats_write(time_t now);
+char *geoip_get_bridge_stats_extrainfo(time_t);
+char *geoip_get_bridge_stats_controller(time_t);
/********************************* hibernate.c **********************/
@@ -4749,7 +4752,6 @@ int router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router,
crypto_pk_env_t *ident_key);
int extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
crypto_pk_env_t *ident_key);
-char *extrainfo_get_client_geoip_summary(time_t);
int is_legal_nickname(const char *s);
int is_legal_nickname_or_hexdigest(const char *s);
int is_legal_hexdigest(const char *s);
diff --git a/src/or/router.c b/src/or/router.c
index 8b6d1d9..7362e60 100644
--- a/src/or/router.c
+++ b/src/or/router.c
@@ -1882,6 +1882,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
int result;
size_t len;
static int write_stats_to_extrainfo = 1;
+ time_t now = time(NULL);
base16_encode(identity, sizeof(identity),
extrainfo->cache_info.identity_digest, DIGEST_LEN);
@@ -1896,7 +1897,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
if (options->ExtraInfoStatistics && write_stats_to_extrainfo) {
char *contents = NULL;
- time_t now = time(NULL);
log_info(LD_GENERAL, "Adding stats to extra-info descriptor.");
if (options->DirReqStatistics &&
load_stats_file("stats"PATH_SEPARATOR"dirreq-stats",
@@ -1953,18 +1953,16 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
return -1;
if (should_record_bridge_info(options)) {
- char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL));
- if (geoip_summary) {
- char geoip_start[ISO_TIME_LEN+1];
- format_iso_time(geoip_start, geoip_get_history_start());
- result = tor_snprintf(s+strlen(s), maxlen-strlen(s),
- "geoip-start-time %s\n"
- "geoip-client-origins %s\n",
- geoip_start, geoip_summary);
- control_event_clients_seen(geoip_start, geoip_summary);
- tor_free(geoip_summary);
- if (result<0)
- return -1;
+ char *bridge_stats = geoip_get_bridge_stats_extrainfo(now);
+ if (bridge_stats) {
+ size_t pos = strlen(s);
+ if (strlcpy(s + pos, bridge_stats, maxlen - strlen(s)) !=
+ strlen(bridge_stats)) {
+ log_warn(LD_DIR, "Could not write bridge-stats to extra-info "
+ "descriptor.");
+ s[pos] = '\0';
+ }
+ tor_free(bridge_stats);
}
}
@@ -2013,30 +2011,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
return (int)strlen(s)+1;
}
-/** Wrapper function for geoip_get_client_history(). It first discards
- * any items in the client history that are too old -- it dumps anything
- * more than 48 hours old, but it only considers whether to dump at most
- * once per 48 hours, so we aren't too precise to an observer (see also
- * r14780).
- */
-char *
-extrainfo_get_client_geoip_summary(time_t now)
-{
- static time_t last_purged_at = 0;
- int geoip_purge_interval =
- (get_options()->DirReqStatistics || get_options()->EntryStatistics) ?
- DIR_ENTRY_RECORD_USAGE_RETAIN_IPS : 48*60*60;
- if (now > last_purged_at+geoip_purge_interval) {
- /* (Note that this also discards items in the client history with
- * action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter
- * because bridge and directory stats are independent. Keep in mind
- * for future extensions, though.) */
- geoip_remove_old_clients(now-geoip_purge_interval);
- last_purged_at = now;
- }
- return geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT);
-}
-
/** Return true iff <b>s</b> is a legally valid server nickname. */
int
is_legal_nickname(const char *s)
--
1.5.6.5