[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] [tor/master] Some tweaks to statistics.



Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed, 15 Jul 2009 16:32:40 +0200
Subject: Some tweaks to statistics.
Commit: 8c496d1660c326c0bc2bd5c505255d5ec3b653ec

Changes to directory request statistics:

- Rename GEOIP statistics to DIRREQ statistics, because they now include
  more than only GeoIP-based statistics, whereas other statistics are
  GeoIP-dependent, too.
- Rename output file from geoip-stats to dirreq-stats.
- Add new config option DirReqStatistics that is required to measure
  directory request statistics.
- Clean up ChangeLog.

Also ensure that entry guards statistics have access to a local GeoIP
database.
---
 ChangeLog                |   14 ++++++++------
 configure.in             |    8 ++++----
 src/or/config.c          |   34 ++++++++++++++++++++++++----------
 src/or/connection.c      |    2 +-
 src/or/connection_edge.c |    4 ++--
 src/or/directory.c       |    4 ++--
 src/or/geoip.c           |   40 ++++++++++++++++++++++++++--------------
 src/or/or.h              |   14 +++++++++-----
 src/or/relay.c           |    6 +++---
 src/or/router.c          |    2 +-
 10 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6b4f651..e8b5000 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -15,12 +15,14 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
     - The memarea code now uses a sentinel value at the end of each area
       to make sure nothing writes beyond the end of an area.  This might
       help debug some conceivable causes of bug 930.
-    - Directories that are configured with the --enable-geoip-stats flag
-      now write their GeoIP stats to disk exactly every 24 hours.
-      Estimated shares of v2 and v3 requests are determined as averages,
-      not at the end of a measurement period. Also, unresolved requests
-      are listed with country code '??'.
-      Directories now also measure download times of network statuses.
+    - Directories that are configured with the --enable-dirreq-stats flag
+      and have "DirReqStatistics 1" set write directory request stats to
+      disk every 24 hours. As compared to the --enable-geoip-stats flag
+      in 0.2.1.x, there are a few improvements: 1) stats are written to
+      disk exactly every 24 hours; 2) estimated shares of v2 and v3
+      requests are determined as mean values, not at the end of a
+      measurement period; 3) unresolved requests are listed with country
+      code '??'; 4) directories also measure download times.
     - Exit nodes can write statistics on the number of exit streams and
       transferred bytes per port to disk every 24 hours.  To enable this,
       run configure with the --enable-exit-stats option, and set
diff --git a/configure.in b/configure.in
index 0f6ed80..ab7d648 100644
--- a/configure.in
+++ b/configure.in
@@ -92,11 +92,11 @@ if test "$enable_exit_stats" = "yes"; then
   AC_DEFINE(ENABLE_EXIT_STATS, 1, [Defined if we try to collect per-port statistics on exits])
 fi
 
-AC_ARG_ENABLE(geoip-stats,
-     AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
+AC_ARG_ENABLE(dirreq-stats,
+     AS_HELP_STRING(--enable-dirreq-stats, enable code for directories to collect per-country statistics))
 
-if test "$enable_geoip_stats" = "yes"; then
-  AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
+if test "$enable_dirreq_stats" = "yes"; then
+  AC_DEFINE(ENABLE_DIRREQ_STATS, 1, [Defined if we try to collect per-country statistics])
 fi
 
 AC_ARG_ENABLE(buffer-stats,
diff --git a/src/or/config.c b/src/or/config.c
index 087a907..1811551 100644
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -187,12 +187,13 @@ static config_var_t _option_vars[] = {
   V(DirPort,                     UINT,     "0"),
   V(DirPortFrontPage,            FILENAME, NULL),
   OBSOLETE("DirPostPeriod"),
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   OBSOLETE("DirRecordUsageByCountry"),
   OBSOLETE("DirRecordUsageGranularity"),
   OBSOLETE("DirRecordUsageRetainIPs"),
   OBSOLETE("DirRecordUsageSaveInterval"),
 #endif
+  V(DirReqStatistics,            BOOL,     "0"),
   VAR("DirServer",               LINELIST, DirServers, NULL),
   V(DNSPort,                     UINT,     "0"),
   V(DNSListenAddress,            LINELIST, NULL),
@@ -1376,17 +1377,25 @@ options_act(or_options_t *old_options)
     geoip_load_file(actual_fname, options);
     tor_free(actual_fname);
   }
-#ifdef ENABLE_GEOIP_STATS
-  /* Check if GeoIP database could be loaded. */
-  if (!geoip_is_loaded()) {
-    log_warn(LD_CONFIG, "Configured to measure GeoIP statistics, but no "
-                        "GeoIP database found!");
-    return -1;
+
+#ifdef ENABLE_DIRREQ_STATS
+  if (options->DirReqStatistics) {
+    /* Check if GeoIP database could be loaded. */
+    if (!geoip_is_loaded()) {
+      log_warn(LD_CONFIG, "Configured to measure directory request "
+               "statistics, but no GeoIP database found!");
+      return -1;
+    }
+    log_notice(LD_CONFIG, "Configured to count directory requests by "
+               "country and write aggregate statistics to disk. Check the "
+               "dirreq-stats file in your data directory that will first "
+               "be written in 24 hours from now.");
   }
-  log_notice(LD_CONFIG, "Configured to measure usage by country and "
-    "write aggregate statistics to disk. Check the geoip-stats file "
-    "in your data directory once I've been running for 24 hours.");
+#else
+  log_warn(LD_CONFIG, "DirReqStatistics enabled, but Tor was built "
+           "without support for directory request statistics.");
 #endif
+
 #ifdef ENABLE_EXIT_STATS
   if (options->ExitPortStatistics)
     log_notice(LD_CONFIG, "Configured to measure exit port statistics. "
@@ -1417,6 +1426,11 @@ options_act(or_options_t *old_options)
       log_warn(LD_CONFIG, "Bridges cannot be configured to measure "
                "additional GeoIP statistics as entry guards.");
       return -1;
+    } else if (!geoip_is_loaded()) {
+      /* Check if GeoIP database could be loaded. */
+      log_warn(LD_CONFIG, "Configured to measure entry node statistics, "
+               "but no GeoIP database found!");
+      return -1;
     } else
       log_notice(LD_CONFIG, "Configured to measure entry node "
                  "statistics. Look for the entry-stats file that will "
diff --git a/src/or/connection.c b/src/or/connection.c
index 242a32c..dc9c4ea 100644
--- a/src/or/connection.c
+++ b/src/or/connection.c
@@ -2302,7 +2302,7 @@ connection_handle_write(connection_t *conn, int force)
     /* else open, or closing */
     result = flush_buf_tls(or_conn->tls, conn->outbuf,
                            max_to_write, &conn->outbuf_flushlen);
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
     /* If we just flushed the last bytes, check if this tunneled dir
      * request is done. */
     if (buf_datalen(conn->outbuf) == 0 && conn->dirreq_id)
diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c
index f2b499f..7a3d0a5 100644
--- a/src/or/connection_edge.c
+++ b/src/or/connection_edge.c
@@ -2551,7 +2551,7 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ)
 
   log_debug(LD_EXIT,"Creating new exit connection.");
   n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET);
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   /* Remember the tunneled request ID in the new edge connection, so that
    * we can measure download times. */
   TO_CONN(n_stream)->dirreq_id = circ->dirreq_id;
@@ -2792,7 +2792,7 @@ connection_exit_connect_dir(edge_connection_t *exitconn)
   dirconn->_base.purpose = DIR_PURPOSE_SERVER;
   dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT;
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   /* Note that the new dir conn belongs to the same tunneled request as
    * the edge conn, so that we can measure download times. */
   TO_CONN(dirconn)->dirreq_id = TO_CONN(exitconn)->dirreq_id;
diff --git a/src/or/directory.c b/src/or/directory.c
index c6faeae..976c08c 100644
--- a/src/or/directory.c
+++ b/src/or/directory.c
@@ -2562,7 +2562,7 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
       goto done;
     }
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
     {
       struct in_addr in;
       if (tor_inet_aton((TO_CONN(conn))->address, &in)) {
@@ -3210,7 +3210,7 @@ connection_dir_finished_flushing(dir_connection_t *conn)
   tor_assert(conn);
   tor_assert(conn->_base.type == CONN_TYPE_DIR);
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   /* Note that we have finished writing the directory response. For direct
    * connections this means we're done, for tunneled connections its only
    * an intermediate step. */
diff --git a/src/or/geoip.c b/src/or/geoip.c
index 0ecc466..7aeec8f 100644
--- a/src/or/geoip.c
+++ b/src/or/geoip.c
@@ -347,7 +347,7 @@ geoip_determine_shares(time_t now)
   last_time_determined_shares = now;
 }
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
  * have been sent to us since the last call of this function up to time
  * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
@@ -390,10 +390,11 @@ geoip_note_client_seen(geoip_client_action_t action,
     if (client_history_starts > now)
       return;
   } else {
-#ifndef ENABLE_GEOIP_STATS
+#ifndef ENABLE_DIRREQ_STATS
     return;
 #else
-    if (options->BridgeRelay || options->BridgeAuthoritativeDir)
+    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
+        !options->DirReqStatistics)
       return;
 #endif
   }
@@ -494,7 +495,7 @@ geoip_remove_old_clients(time_t cutoff)
     client_history_starts = cutoff;
 }
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
 /** How many responses are we giving to clients requesting v2 network
  * statuses? */
 static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
@@ -511,8 +512,10 @@ void
 geoip_note_ns_response(geoip_client_action_t action,
                        geoip_ns_response_t response)
 {
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   static int arrays_initialized = 0;
+  if (!get_options()->DirReqStatistics)
+    return;
   if (!arrays_initialized) {
     memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
     memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
@@ -649,7 +652,10 @@ void
 geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
                    geoip_client_action_t action, dirreq_type_t type)
 {
-  dirreq_map_entry_t *ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
+  dirreq_map_entry_t *ent;
+  if (!get_options()->DirReqStatistics)
+    return;
+  ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
   ent->dirreq_id = dirreq_id;
   tor_gettimeofday(&ent->request_time);
   ent->response_size = response_size;
@@ -668,7 +674,10 @@ void
 geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
                           dirreq_state_t new_state)
 {
-  dirreq_map_entry_t *ent = _dirreq_map_get(type, dirreq_id);
+  dirreq_map_entry_t *ent;
+  if (!get_options()->DirReqStatistics)
+    return;
+  ent = _dirreq_map_get(type, dirreq_id);
   if (!ent)
     return;
   if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
@@ -685,7 +694,7 @@ geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
   }
 }
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
 /** Return a newly allocated comma-separated string containing statistics
  * on network status downloads. The string contains the number of completed
  * requests, timeouts, and still running requests as well as the download
@@ -788,7 +797,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
 {
   char *result = NULL;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
 #endif
   if (!geoip_is_loaded())
@@ -803,7 +812,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned total = 0;
     unsigned granularity = IP_GRANULARITY;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
     granularity = DIR_RECORD_USAGE_GRANULARITY;
 #endif
     HT_FOREACH(ent, clientmap, &client_history) {
@@ -871,7 +880,7 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   char *result;
   unsigned granularity = IP_GRANULARITY;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   granularity = DIR_RECORD_USAGE_GRANULARITY;
   min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
 #endif
@@ -916,14 +925,14 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   return result;
 }
 
-/** Store all our geoip statistics into $DATADIR/geoip-stats. */
+/** Store all our geoip statistics into $DATADIR/dirreq-stats. */
 static void
 dump_geoip_stats(void)
 {
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   time_t now = time(NULL);
   time_t request_start;
-  char *filename = get_datadir_fname("geoip-stats");
+  char *filename = get_datadir_fname("dirreq-stats");
   char *data_v2 = NULL, *data_v3 = NULL;
   char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
   open_file_t *open_file = NULL;
@@ -931,6 +940,9 @@ dump_geoip_stats(void)
   FILE *out;
   int i;
 
+  if (!get_options()->DirReqStatistics)
+    goto done;
+
   data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
   data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
   format_iso_time(since, geoip_get_history_start());
diff --git a/src/or/or.h b/src/or/or.h
index 7b91ff7..445df4d 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -20,8 +20,8 @@
 #ifndef INSTRUMENT_DOWNLOADS
 #define INSTRUMENT_DOWNLOADS 1
 #endif
-#ifndef ENABLE_GEOIP_STATS
-#define ENABLE_GEOIP_STATS 1
+#ifndef ENABLE_DIRREQ_STATS
+#define ENABLE_DIRREQ_STATS 1
 #endif
 #ifndef ENABLE_BUFFER_STATS
 #define ENABLE_BUFFER_STATS 1
@@ -970,7 +970,7 @@ typedef struct connection_t {
    * to the evdns_server_port is uses to listen to and answer connections. */
   struct evdns_server_port *dns_server_port;
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   /** Unique ID for measuring tunneled network status requests. */
   uint64_t dirreq_id;
 #endif
@@ -1960,7 +1960,7 @@ typedef struct circuit_t {
    * linked to an OR connection. */
   struct circuit_t *prev_active_on_n_conn;
   struct circuit_t *next; /**< Next circuit in linked list of all circuits. */
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   /** Unique ID for measuring tunneled network status requests. */
   uint64_t dirreq_id;
 #endif
@@ -2500,6 +2500,10 @@ typedef struct {
    * exit allows it, we use it. */
   int AllowSingleHopCircuits;
 
+  /** If true, the user wants us to collect statistics on clients
+   * requesting network statuses from us as directory. */
+  int DirReqStatistics;
+
   /** If true, the user wants us to collect statistics on port usage. */
   int ExitPortStatistics;
 
@@ -2564,7 +2568,7 @@ typedef struct {
   int BridgeRecordUsageByCountry;
 
 #if 0
-  /** If true, and Tor is built with GEOIP_STATS support, and we're a
+  /** If true, and Tor is built with DIRREQ_STATS support, and we're a
    * directory, record how many directory requests we get from each country. */
   int DirRecordUsageByCountry;
   /** Round all GeoIP results to the next multiple of this value, to avoid
diff --git a/src/or/relay.c b/src/or/relay.c
index 5654736..098b952 100644
--- a/src/or/relay.c
+++ b/src/or/relay.c
@@ -532,7 +532,7 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   /* If we are sending an END cell and this circuit is used for a tunneled
    * directory request, advance its state. */
   if (relay_command == RELAY_COMMAND_END && circ->dirreq_id)
@@ -1040,7 +1040,7 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
                "Begin cell for known stream. Dropping.");
         return 0;
       }
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
       if (rh.command == RELAY_COMMAND_BEGIN_DIR) {
         /* Assign this circuit and its app-ward OR connection a unique ID,
          * so that we can measure download times. The local edge and dir
@@ -1841,7 +1841,7 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max,
       orcirc->processed_cells++;
     }
 #endif
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
     /* If we just flushed our queue and this circuit is used for a
      * tunneled directory request, possibly advance its state. */
     if (queue->n == 0 && TO_CONN(conn)->dirreq_id)
diff --git a/src/or/router.c b/src/or/router.c
index bdea4fa..42a0d56 100644
--- a/src/or/router.c
+++ b/src/or/router.c
@@ -1916,7 +1916,7 @@ extrainfo_get_client_geoip_summary(time_t now)
 {
   static time_t last_purged_at = 0;
   int geoip_purge_interval = 48*60*60;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS;
 #endif
 #ifdef ENABLE_ENTRY_STATS
-- 
1.5.6.5