[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] [tor/master] If configured, write per-port exit statistics to disk periodically.



Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed, 24 Jun 2009 19:51:45 +0200
Subject: If configured, write per-port exit statistics to disk periodically.
Commit: 4d6af73db88e409764f43fc6cdaa432d667becf3

[Original patch series from Karsten, revised and squashed by Nick]
---
 ChangeLog                |    4 +
 configure.in             |    7 ++
 src/or/config.c          |   11 +++
 src/or/connection.c      |   10 ++-
 src/or/connection_edge.c |    2 +
 src/or/or.h              |   14 ++++
 src/or/rephist.c         |  170 ++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 216 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b06d099..0d5af88 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -17,6 +17,10 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
       help debug some conceivable causes of bug 930.
     - Directories that are configured with the --enable-geoip-stats flag
       now write their GeoIP stats to disk exactly every 24 hours.
+    - Exit nodes can write statistics on the number of exit streams and
+      transferred bytes per port to disk every 24 hours.  To enable this,
+      run configure with the --enable-exit-stats option, and set
+      "ExitPortStatistics 1" in your torrc.
 
   o Minor bugfixes
     - Hidden service clients didn't use a cached service descriptor that
diff --git a/configure.in b/configure.in
index f1ac836..8442115 100644
--- a/configure.in
+++ b/configure.in
@@ -85,6 +85,13 @@ case $host in
      ;;
 esac
 
+AC_ARG_ENABLE(exit-stats,
+     AS_HELP_STRING(--enable-exit-stats, enable code for exits to collect per-port statistics))
+
+if test "$enable_exit_stats" = "yes"; then
+  AC_DEFINE(ENABLE_EXIT_STATS, 1, [Defined if we try to collect per-port statistics on exits])
+fi
+
 AC_ARG_ENABLE(geoip-stats,
      AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
 
diff --git a/src/or/config.c b/src/or/config.c
index 6ad1c3a..f1fea13 100644
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -205,6 +205,7 @@ static config_var_t _option_vars[] = {
   V(ExitNodes,                   ROUTERSET, NULL),
   V(ExitPolicy,                  LINELIST, NULL),
   V(ExitPolicyRejectPrivate,     BOOL,     "1"),
+  V(ExitPortStatistics,          BOOL,     "0"),
   V(FallbackNetworkstatusFile,   FILENAME,
     SHARE_DATADIR PATH_SEPARATOR "tor" PATH_SEPARATOR "fallback-consensus"),
   V(FascistFirewall,             BOOL,     "0"),
@@ -1384,6 +1385,16 @@ options_act(or_options_t *old_options)
     "write aggregate statistics to disk. Check the geoip-stats file "
     "in your data directory once I've been running for 24 hours.");
 #endif
+#ifdef ENABLE_EXIT_STATS
+  if (options->ExitPortStatistics)
+    log_notice(LD_CONFIG, "Configured to measure exit port statistics. "
+               "Look for the exit-stats file that will first be written to "
+               "the data directory in 24 hours from now.");
+#else
+  if (options->ExitPortStatistics)
+    log_warn(LD_CONFIG, "ExitPortStatistics enabled, but Tor was built "
+             "without port statistics support.");
+#endif
   /* Check if we need to parse and add the EntryNodes config option. */
   if (options->EntryNodes &&
       (!old_options ||
diff --git a/src/or/connection.c b/src/or/connection.c
index c8406fe..0897eb4 100644
--- a/src/or/connection.c
+++ b/src/or/connection.c
@@ -1702,10 +1702,16 @@ connection_buckets_decrement(connection_t *conn, time_t now,
     tor_fragile_assert();
   }
 
-  if (num_read > 0)
+  if (num_read > 0) {
+    if (conn->type == CONN_TYPE_EXIT)
+      rep_hist_note_exit_bytes_read(conn->port, num_read, now);
     rep_hist_note_bytes_read(num_read, now);
-  if (num_written > 0)
+  }
+  if (num_written > 0) {
+    if (conn->type == CONN_TYPE_EXIT)
+      rep_hist_note_exit_bytes_written(conn->port, num_written, now);
     rep_hist_note_bytes_written(num_written, now);
+  }
 
   if (connection_counts_as_relayed_traffic(conn, now)) {
     global_relayed_read_bucket -= (int)num_read;
diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c
index 3cd00e2..dd44190 100644
--- a/src/or/connection_edge.c
+++ b/src/or/connection_edge.c
@@ -333,6 +333,8 @@ connection_edge_finished_connecting(edge_connection_t *edge_conn)
            escaped_safe_str(conn->address),conn->port,
            safe_str(fmt_addr(&conn->addr)));
 
+  rep_hist_note_exit_stream_opened(conn->port, approx_time());
+
   conn->state = EXIT_CONN_STATE_OPEN;
   connection_watch_events(conn, READ_EVENT); /* stop writing, keep reading */
   if (connection_wants_to_flush(conn)) /* in case there are any queued relay
diff --git a/src/or/or.h b/src/or/or.h
index 091264a..935ea52 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -2475,6 +2475,9 @@ typedef struct {
    * exit allows it, we use it. */
   int AllowSingleHopCircuits;
 
+  /** If true, the user wants us to collect statistics on port usage. */
+  int ExitPortStatistics;
+
   /** If true, do not believe anybody who tells us that a domain resolves
    * to an internal address, or that an internal address has a PTR mapping.
    * Helps avoid some cross-site attacks. */
@@ -3961,6 +3964,17 @@ void rep_hist_note_extend_failed(const char *from_name, const char *to_name);
 void rep_hist_dump_stats(time_t now, int severity);
 void rep_hist_note_bytes_read(size_t num_bytes, time_t when);
 void rep_hist_note_bytes_written(size_t num_bytes, time_t when);
+#ifdef ENABLE_EXIT_STATS
+void rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes,
+                                   time_t now);
+void rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes,
+                                      time_t now);
+void rep_hist_note_exit_stream_opened(uint16_t port, time_t now);
+#else
+#define rep_hist_note_exit_bytes_read(p,n,t) STMT_NIL
+#define rep_hist_note_exit_bytes_written(p,n,t) STMT_NIL
+#define rep_hist_note_exit_stream_opened(p,t) STMT_NIL
+#endif
 int rep_hist_bandwidth_assess(void);
 char *rep_hist_get_bandwidth_lines(int for_extrainfo);
 void rep_hist_update_state(or_state_t *state);
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 13fdb58..f1f502c 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -1320,6 +1320,176 @@ rep_hist_note_bytes_read(size_t num_bytes, time_t when)
   add_obs(read_array, when, num_bytes);
 }
 
+#ifdef ENABLE_EXIT_STATS
+/* Some constants */
+/** How long are the intervals for measuring exit stats? */
+#define EXIT_STATS_INTERVAL_SEC (24 * 60 * 60)
+/** To what multiple should byte numbers be rounded up? */
+#define EXIT_STATS_ROUND_UP_BYTES 1024
+/** To what multiple should stream counts be rounded up? */
+#define EXIT_STATS_ROUND_UP_STREAMS 4
+/** Number of TCP ports */
+#define EXIT_STATS_NUM_PORTS 65536
+
+/* The following data structures are arrays and no fancy smartlists or maps,
+ * so that all write operations can be done in constant time. This comes at
+ * the price of some memory (1.25 MB) and linear complexity when writing
+ * stats. */
+/** Number of bytes read in current period by exit port */
+static uint64_t exit_bytes_read[EXIT_STATS_NUM_PORTS];
+/** Number of bytes written in current period by exit port */
+static uint64_t exit_bytes_written[EXIT_STATS_NUM_PORTS];
+/** Number of streams opened in current period by exit port */
+static uint32_t exit_streams[EXIT_STATS_NUM_PORTS];
+
+/** When does the current exit stats period end? */
+static time_t end_of_current_exit_stats_period = 0;
+
+/** Write exit stats for the current period to disk and reset counters. */
+static void
+write_exit_stats(time_t when)
+{
+  char t[ISO_TIME_LEN+1];
+  int r, i, comma;
+  uint64_t *b;
+
+  char *filename = get_datadir_fname("exit-stats");
+  open_file_t *open_file = NULL;
+  FILE *out = NULL;
+
+  log_debug(LD_HIST, "Considering writing exit port statistics to disk..");
+  while (when > end_of_current_exit_stats_period) {
+    format_iso_time(t, end_of_current_exit_stats_period);
+    log_info(LD_HIST, "Writing exit port statistics to disk for period "
+             "ending at %s.", t);
+
+    if (!open_file) {
+      out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
+                                        0600, &open_file);
+      if (!out) {
+        log_warn(LD_HIST, "Couldn't open '%s'.", filename);
+        goto done;
+      }
+    }
+
+    /* written yyyy-mm-dd HH:MM:SS (n s) */
+    if (fprintf(out, "written %s (%d s)\n", t, EXIT_STATS_INTERVAL_SEC) < 0)
+      goto done;
+
+    /* kibibytes-(read|written) port=kibibytes,.. */
+    for (r = 0; r < 2; r++) {
+      b = r ? exit_bytes_read : exit_bytes_written;
+      tor_assert(b);
+      if (fprintf(out, "%s ",
+                  r == 0 ? "kibibytes-read" : "kibibytes-written")<0)
+        goto done;
+
+      comma = 0;
+      for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
+        if (b[i] > 0) {
+          uint64_t num = b[i];
+          num += EXIT_STATS_ROUND_UP_BYTES - 1;
+          num /= EXIT_STATS_ROUND_UP_BYTES;
+          num *= EXIT_STATS_ROUND_UP_BYTES;
+          num /= 1024;
+          if (fprintf(out, "%s%d="U64_FORMAT,
+                      comma++ ? "," : "", i,
+                      U64_PRINTF_ARG(num)) < 0)
+            goto done;
+        }
+      }
+      if (fprintf(out, "\n")<0)
+        goto done;
+      /* Reset counters */
+      memset(b, 0, EXIT_STATS_NUM_PORTS*sizeof(uint64_t));
+    }
+    /* streams-opened port=num,.. */
+    if (fprintf(out, "streams-opened ")<0)
+      goto done;
+    comma = 0;
+    for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
+      if (exit_streams[i] > 0) {
+        uint32_t num = exit_streams[i];
+        num += EXIT_STATS_ROUND_UP_STREAMS - 1;
+        num /= EXIT_STATS_ROUND_UP_STREAMS;
+        num *= EXIT_STATS_ROUND_UP_STREAMS;
+        if (fprintf(out, "%s%d=%d",
+                    comma++ ? "," : "", i, num)<0)
+          goto done;
+      }
+    }
+    if (fprintf(out, "\n")<0)
+      goto done;
+    /* Reset counters */
+    memset(exit_streams, 0, sizeof(exit_streams));
+    end_of_current_exit_stats_period += EXIT_STATS_INTERVAL_SEC;
+  }
+
+  if (open_file)
+    finish_writing_to_file(open_file);
+  open_file = NULL;
+ done:
+  if (open_file)
+    abort_writing_to_file(open_file);
+  tor_free(filename);
+}
+
+/** Prepare to add an exit stats observation at second <b>when</b> by
+ * checking whether this observation lies in the current observation
+ * period; if not, shift the current period forward by one until the
+ * reported event fits it and write all results in between to disk. */
+static void
+add_exit_obs(time_t when)
+{
+  if (when > end_of_current_exit_stats_period) {
+    if (end_of_current_exit_stats_period)
+      write_exit_stats(when);
+    else
+      end_of_current_exit_stats_period = when + EXIT_STATS_INTERVAL_SEC;
+  }
+}
+
+/** Note that we wrote <b>num_bytes</b> to an exit connection to
+ * <b>port</b> in second <b>when</b>. */
+void
+rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes,
+                                 time_t when)
+{
+  if (!get_options()->ExitPortStatistics)
+    return;
+  add_exit_obs(when);
+  exit_bytes_written[port] += num_bytes;
+  log_debug(LD_HIST, "Written %lu bytes to exit connection to port %d.",
+            (unsigned long)num_bytes, port);
+}
+
+/** Note that we read <b>num_bytes</b> from an exit connection to
+ * <b>port</b> in second <b>when</b>. */
+void
+rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes,
+                              time_t when)
+{
+  if (!get_options()->ExitPortStatistics)
+    return;
+  add_exit_obs(when);
+  exit_bytes_read[port] += num_bytes;
+  log_debug(LD_HIST, "Read %lu bytes from exit connection to port %d.",
+            (unsigned long)num_bytes, port);
+}
+
+/** Note that we opened an exit stream to <b>port</b> in second
+ * <b>when</b>. */
+void
+rep_hist_note_exit_stream_opened(uint16_t port, time_t when)
+{
+  if (!get_options()->ExitPortStatistics)
+    return;
+  add_exit_obs(when);
+  exit_streams[port]++;
+  log_debug(LD_HIST, "Opened exit stream to port %d", port);
+}
+#endif
+
 /** Helper: Return the largest value in b->maxima.  (This is equal to the
  * most bandwidth used in any NUM_SECS_ROLLING_MEASURE period for the last
  * NUM_SECS_BW_SUM_IS_VALID seconds.)
-- 
1.5.6.5