[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] [tor/maint-0.2.1] the third piece of bug 969 fixing



Author: Roger Dingledine <arma@xxxxxxxxxxxxxx>
Date: Sat, 20 Jun 2009 05:25:14 -0400
Subject: the third piece of bug 969 fixing
Commit: e7bc189f7c8fb4c2a490f10bd26d81893626ade1

when we write out our stability info, detect relays that have slipped
through the cracks. log about them and correct the problem.

if we continue to see a lot of these over time, it means there's another
spot where relays fall out of the routerlist without being marked as
unreachable.
---
 src/or/main.c    |    9 +++++----
 src/or/or.h      |    2 +-
 src/or/rephist.c |   20 ++++++++++++++++++--
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/or/main.c b/src/or/main.c
index 8fc712b..60c42aa 100644
--- a/src/or/main.c
+++ b/src/or/main.c
@@ -903,7 +903,7 @@ run_scheduled_events(time_t now)
     time_to_downrate_stability = rep_hist_downrate_old_runs(now);
   if (authdir_mode_tests_reachability(options)) {
     if (time_to_save_stability < now) {
-      if (time_to_save_stability && rep_hist_record_mtbf_data()<0) {
+      if (time_to_save_stability && rep_hist_record_mtbf_data(now, 1)<0) {
         log_warn(LD_GENERAL, "Couldn't store mtbf data.");
       }
 #define SAVE_STABILITY_INTERVAL (30*60)
@@ -1955,14 +1955,15 @@ tor_cleanup(void)
   /* Remove our pid file. We don't care if there was an error when we
    * unlink, nothing we could do about it anyways. */
   if (options->command == CMD_RUN_TOR) {
+    time_t now = time(NULL);
     if (options->PidFile)
       unlink(options->PidFile);
     if (accounting_is_enabled(options))
-      accounting_record_bandwidth_usage(time(NULL), get_or_state());
+      accounting_record_bandwidth_usage(now, get_or_state());
     or_state_mark_dirty(get_or_state(), 0); /* force an immediate save. */
-    or_state_save(time(NULL));
+    or_state_save(now);
     if (authdir_mode_tests_reachability(options))
-      rep_hist_record_mtbf_data();
+      rep_hist_record_mtbf_data(now, 0);
   }
 #ifdef USE_DMALLOC
   dmalloc_log_stats();
diff --git a/src/or/or.h b/src/or/or.h
index f37b417..eddeda1 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3970,7 +3970,7 @@ void rep_history_clean(time_t before);
 
 void rep_hist_note_router_reachable(const char *id, time_t when);
 void rep_hist_note_router_unreachable(const char *id, time_t when);
-int rep_hist_record_mtbf_data(void);
+int rep_hist_record_mtbf_data(time_t now, int missing_means_down);
 int rep_hist_load_mtbf_data(time_t now);
 
 time_t rep_hist_downrate_old_runs(time_t now);
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 11e040c..13fdb58 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -683,9 +683,13 @@ rep_history_clean(time_t before)
   }
 }
 
-/** Write MTBF data to disk.  Returns 0 on success, negative on failure. */
+/** Write MTBF data to disk. Return 0 on success, negative on failure.
+ *
+ * If <b>missing_means_down</b>, then if we're about to write an entry
+ * that is still considered up but isn't in our routerlist, consider it
+ * to be down. */
 int
-rep_hist_record_mtbf_data(void)
+rep_hist_record_mtbf_data(time_t now, int missing_means_down)
 {
   char time_buf[ISO_TIME_LEN+1];
 
@@ -745,6 +749,18 @@ rep_hist_record_mtbf_data(void)
     hist = (or_history_t*) or_history_p;
 
     base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
+
+    if (missing_means_down && hist->start_of_run &&
+        !router_get_by_digest(digest)) {
+      /* We think this relay is running, but it's not listed in our
+       * routerlist. Somehow it fell out without telling us it went
+       * down. Complain and also correct it. */
+      log_info(LD_HIST,
+               "Relay '%s' is listed as up in rephist, but it's not in "
+               "our routerlist. Correcting.", dbuf);
+      rep_hist_note_router_unreachable(digest, now);
+    }
+
     PRINTF((f, "R %s\n", dbuf));
     if (hist->start_of_run > 0) {
       format_iso_time(time_buf, hist->start_of_run);
-- 
1.5.6.5