[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] [tor/master 03/10] Do away with the complexity of the network liveness detection.



Author: Mike Perry <mikeperry-git@xxxxxxxxxx>
Date: Wed, 29 Sep 2010 08:55:11 -0700
Subject: Do away with the complexity of the network liveness detection.
Commit: 11910cf5b32edfd6b900386d37bb69c7592174c1

We really should ignore any timeouts that have *no* network activity for their
entire measured lifetime, now that we have the 95th percentile measurement
changes. Usually this is up to a minute, even on fast connections.
---
 src/or/circuitbuild.c |   71 ++++++++++++++----------------------------------
 src/or/or.h           |   32 +---------------------
 src/test/test.c       |   26 ++++--------------
 3 files changed, 28 insertions(+), 101 deletions(-)

diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c
index 81b63fe..234765b 100644
--- a/src/or/circuitbuild.c
+++ b/src/or/circuitbuild.c
@@ -306,6 +306,7 @@ circuit_build_times_init(circuit_build_times_t *cbt)
   control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
 }
 
+#if 0
 /**
  * Rewind our build time history by n positions.
  */
@@ -332,6 +333,7 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
           "Rewound history by %d places. Current index: %d. "
           "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
 }
+#endif
 
 /**
  * Add a new build time value <b>time</b> to the set of build times. Time
@@ -941,8 +943,16 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
 void
 circuit_build_times_network_is_live(circuit_build_times_t *cbt)
 {
-  cbt->liveness.network_last_live = approx_time();
-  cbt->liveness.nonlive_discarded = 0;
+  time_t now = approx_time();
+  if (cbt->liveness.nonlive_timeouts > 0) {
+    log_notice(LD_CIRC,
+               "Tor now sees network activity. Restoring circuit build "
+               "timeout recording. Network was down for %ld seconds "
+               "during %d circuit attempts.",
+               (long int)now - cbt->liveness.network_last_live,
+               cbt->liveness.nonlive_timeouts);
+  }
+  cbt->liveness.network_last_live = now;
   cbt->liveness.nonlive_timeouts = 0;
 }
 
@@ -1002,9 +1012,16 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
                now_buf);
     }
     cbt->liveness.nonlive_timeouts++;
-    log_info(LD_CIRC,
+    if (cbt->liveness.nonlive_timeouts == 1) {
+      log_notice(LD_CIRC,
+                 "Tor has not observed any network activity for the past %ld "
+                 "seconds. Disabling circuit build timeout code.",
+                 (long int)now - cbt->liveness.network_last_live);
+    } else {
+      log_info(LD_CIRC,
              "Got non-live timeout. Current count is: %d",
              cbt->liveness.nonlive_timeouts);
+    }
   }
 }
 
@@ -1018,54 +1035,8 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
 int
 circuit_build_times_network_check_live(circuit_build_times_t *cbt)
 {
-  time_t now = approx_time();
-  if (cbt->liveness.nonlive_timeouts >= CBT_NETWORK_NONLIVE_DISCARD_COUNT) {
-    if (!cbt->liveness.nonlive_discarded) {
-      cbt->liveness.nonlive_discarded = 1;
-      log_notice(LD_CIRC, "Network is no longer live (too many recent "
-                "circuit timeouts). Dead for %ld seconds.",
-                (long int)(now - cbt->liveness.network_last_live));
-      /* Only discard NETWORK_NONLIVE_TIMEOUT_COUNT-1 because we stopped
-       * counting after that */
-      circuit_build_times_rewind_history(cbt,
-                     CBT_NETWORK_NONLIVE_TIMEOUT_COUNT-1);
-      control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_DISCARD);
-    }
-    return 0;
-  } else if (cbt->liveness.nonlive_timeouts >=
-                CBT_NETWORK_NONLIVE_TIMEOUT_COUNT) {
-    if (cbt->liveness.suspended_timeout <= 0) {
-      cbt->liveness.suspended_timeout = cbt->timeout_ms;
-      cbt->liveness.suspended_close_timeout = cbt->close_ms;
-
-      if (cbt->timeout_ms < circuit_build_times_get_initial_timeout())
-        cbt->timeout_ms = circuit_build_times_get_initial_timeout();
-      else
-        cbt->timeout_ms *= 2;
-
-      if (cbt->close_ms < circuit_build_times_get_initial_timeout())
-        cbt->close_ms = circuit_build_times_get_initial_timeout();
-      else
-        cbt->close_ms *= 2;
-
-      log_notice(LD_CIRC,
-                "Network is flaky. No activity for %ld seconds. "
-                "Temporarily raising timeout to %lds.",
-                (long int)(now - cbt->liveness.network_last_live),
-                tor_lround(cbt->timeout_ms/1000));
-      control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_SUSPENDED);
-    }
-
+  if (cbt->liveness.nonlive_timeouts > 0) {
     return 0;
-  } else if (cbt->liveness.suspended_timeout > 0) {
-    log_notice(LD_CIRC,
-              "Network activity has resumed. "
-              "Resuming circuit timeout calculations.");
-    cbt->timeout_ms = cbt->liveness.suspended_timeout;
-    cbt->close_ms = cbt->liveness.suspended_close_timeout;
-    cbt->liveness.suspended_timeout = 0;
-    cbt->liveness.suspended_close_timeout = 0;
-    control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESUME);
   }
 
   return 1;
diff --git a/src/or/or.h b/src/or/or.h
index 6c398b7..dc46684 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -2961,26 +2961,6 @@ typedef uint32_t build_time_t;
 /** Save state every 10 circuits */
 #define CBT_SAVE_STATE_EVERY 10
 
-/* Circuit Build Timeout network liveness constants */
-
-/**
- * Have we received a cell in the last N circ attempts?
- *
- * This tells us when to temporarily switch back to
- * BUILD_TIMEOUT_INITIAL_VALUE until we start getting cells,
- * at which point we switch back to computing the timeout from
- * our saved history.
- */
-#define CBT_NETWORK_NONLIVE_TIMEOUT_COUNT 3
-
-/**
- * This tells us when to toss out the last streak of N timeouts.
- *
- * If instead we start getting cells, we switch back to computing the timeout
- * from our saved history.
- */
-#define CBT_NETWORK_NONLIVE_DISCARD_COUNT (CBT_NETWORK_NONLIVE_TIMEOUT_COUNT*2)
-
 /* Circuit build times consensus parameters */
 
 /**
@@ -3021,9 +3001,7 @@ double circuit_build_times_quantile_cutoff(void);
 #define CBT_DEFAULT_TIMEOUT_INITIAL_VALUE (60*1000)
 int32_t circuit_build_times_initial_timeout(void);
 
-#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1 || \
-    CBT_NETWORK_NONLIVE_DISCARD_COUNT < 1 || \
-    CBT_NETWORK_NONLIVE_TIMEOUT_COUNT < 1
+#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1
 #error "RECENT_CIRCUITS is set too low."
 #endif
 
@@ -3033,8 +3011,6 @@ typedef struct {
   time_t network_last_live;
   /** If the network is not live, how many timeouts has this caused? */
   int nonlive_timeouts;
-  /** If the network is not live, have we yet discarded our history? */
-  int nonlive_discarded;
   /** Circular array of circuits that have made it to the first hop. Slot is
    * 1 if circuit timed out, 0 if circuit succeeded */
   int8_t *timeouts_after_firsthop;
@@ -3042,12 +3018,6 @@ typedef struct {
   int num_recent_circs;
   /** Index into circular array. */
   int after_firsthop_idx;
-  /** Timeout gathering is suspended if non-zero. The old timeout value
-    * is stored here in that case. */
-  double suspended_timeout;
-  /** Timeout gathering is suspended if non-zero. The old close value
-    * is stored here in that case. */
-  double suspended_close_timeout;
 } network_liveness_t;
 
 /** Structure for circuit build times history */
diff --git a/src/test/test.c b/src/test/test.c
index d952832..8d8c46f 100644
--- a/src/test/test.c
+++ b/src/test/test.c
@@ -499,28 +499,14 @@ test_circuit_timeout(void)
 
     build_times_idx = estimate.build_times_idx;
     total_build_times = estimate.total_build_times;
-    for (i = 0; i < CBT_NETWORK_NONLIVE_TIMEOUT_COUNT; i++) {
-      test_assert(circuit_build_times_network_check_live(&estimate));
-      test_assert(circuit_build_times_network_check_live(&final));
-
-      circuit_build_times_count_close(&estimate, 0,
-                 (time_t)(approx_time()-estimate.close_ms/1000.0-1));
-      circuit_build_times_count_close(&final, 0,
-                 (time_t)(approx_time()-final.close_ms/1000.0-1));
-    }
-
-    test_assert(!circuit_build_times_network_check_live(&estimate));
-    test_assert(!circuit_build_times_network_check_live(&final));
 
-    for ( ; i < CBT_NETWORK_NONLIVE_DISCARD_COUNT; i++) {
-      circuit_build_times_count_close(&estimate, 0,
-                (time_t)(approx_time()-estimate.close_ms/1000.0-1));
+    test_assert(circuit_build_times_network_check_live(&estimate));
+    test_assert(circuit_build_times_network_check_live(&final));
 
-      if (i < CBT_NETWORK_NONLIVE_DISCARD_COUNT-1) {
-        circuit_build_times_count_close(&final, 0,
-                (time_t)(approx_time()-final.close_ms/1000.0-1));
-      }
-    }
+    circuit_build_times_count_close(&estimate, 0,
+            (time_t)(approx_time()-estimate.close_ms/1000.0-1));
+    circuit_build_times_count_close(&final, 0,
+            (time_t)(approx_time()-final.close_ms/1000.0-1));
 
     test_assert(!circuit_build_times_network_check_live(&estimate));
     test_assert(!circuit_build_times_network_check_live(&final));
-- 
1.7.1