[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [tor/maint-0.2.1] the third piece of bug 969 fixing
Author: Roger Dingledine <arma@xxxxxxxxxxxxxx>
Date: Sat, 20 Jun 2009 05:25:14 -0400
Subject: the third piece of bug 969 fixing
Commit: e7bc189f7c8fb4c2a490f10bd26d81893626ade1
when we write out our stability info, detect relays that have slipped
through the cracks. log about them and correct the problem.
if we continue to see a lot of these over time, it means there's another
spot where relays fall out of the routerlist without being marked as
unreachable.
---
src/or/main.c | 9 +++++----
src/or/or.h | 2 +-
src/or/rephist.c | 20 ++++++++++++++++++--
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/src/or/main.c b/src/or/main.c
index 8fc712b..60c42aa 100644
--- a/src/or/main.c
+++ b/src/or/main.c
@@ -903,7 +903,7 @@ run_scheduled_events(time_t now)
time_to_downrate_stability = rep_hist_downrate_old_runs(now);
if (authdir_mode_tests_reachability(options)) {
if (time_to_save_stability < now) {
- if (time_to_save_stability && rep_hist_record_mtbf_data()<0) {
+ if (time_to_save_stability && rep_hist_record_mtbf_data(now, 1)<0) {
log_warn(LD_GENERAL, "Couldn't store mtbf data.");
}
#define SAVE_STABILITY_INTERVAL (30*60)
@@ -1955,14 +1955,15 @@ tor_cleanup(void)
/* Remove our pid file. We don't care if there was an error when we
* unlink, nothing we could do about it anyways. */
if (options->command == CMD_RUN_TOR) {
+ time_t now = time(NULL);
if (options->PidFile)
unlink(options->PidFile);
if (accounting_is_enabled(options))
- accounting_record_bandwidth_usage(time(NULL), get_or_state());
+ accounting_record_bandwidth_usage(now, get_or_state());
or_state_mark_dirty(get_or_state(), 0); /* force an immediate save. */
- or_state_save(time(NULL));
+ or_state_save(now);
if (authdir_mode_tests_reachability(options))
- rep_hist_record_mtbf_data();
+ rep_hist_record_mtbf_data(now, 0);
}
#ifdef USE_DMALLOC
dmalloc_log_stats();
diff --git a/src/or/or.h b/src/or/or.h
index f37b417..eddeda1 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3970,7 +3970,7 @@ void rep_history_clean(time_t before);
void rep_hist_note_router_reachable(const char *id, time_t when);
void rep_hist_note_router_unreachable(const char *id, time_t when);
-int rep_hist_record_mtbf_data(void);
+int rep_hist_record_mtbf_data(time_t now, int missing_means_down);
int rep_hist_load_mtbf_data(time_t now);
time_t rep_hist_downrate_old_runs(time_t now);
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 11e040c..13fdb58 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -683,9 +683,13 @@ rep_history_clean(time_t before)
}
}
-/** Write MTBF data to disk. Returns 0 on success, negative on failure. */
+/** Write MTBF data to disk. Return 0 on success, negative on failure.
+ *
+ * If <b>missing_means_down</b>, then if we're about to write an entry
+ * that is still considered up but isn't in our routerlist, consider it
+ * to be down. */
int
-rep_hist_record_mtbf_data(void)
+rep_hist_record_mtbf_data(time_t now, int missing_means_down)
{
char time_buf[ISO_TIME_LEN+1];
@@ -745,6 +749,18 @@ rep_hist_record_mtbf_data(void)
hist = (or_history_t*) or_history_p;
base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
+
+ if (missing_means_down && hist->start_of_run &&
+ !router_get_by_digest(digest)) {
+ /* We think this relay is running, but it's not listed in our
+ * routerlist. Somehow it fell out without telling us it went
+ * down. Complain and also correct it. */
+ log_info(LD_HIST,
+ "Relay '%s' is listed as up in rephist, but it's not in "
+ "our routerlist. Correcting.", dbuf);
+ rep_hist_note_router_unreachable(digest, now);
+ }
+
PRINTF((f, "R %s\n", dbuf));
if (hist->start_of_run > 0) {
format_iso_time(time_buf, hist->start_of_run);
--
1.5.6.5