[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [tor/master 2/2] More gracefully handle corrupt state files.
Author: Mike Perry <mikeperry-git@xxxxxxxxxx>
Date: Tue, 6 Jul 2010 12:08:13 -0700
Subject: More gracefully handle corrupt state files.
Commit: a9edb0b4f67825a11647c8faefa613d704be44ae
Save a backup if we get odd circuitbuildtimes and other state info.
In the case of circuit build times, we no longer assert, and reset our state.
---
changes/cbt-bugfixes | 2 +
src/or/circuitbuild.c | 27 ++++++++++++++++---
src/or/config.c | 71 ++++++++++++++++++++++++++++++------------------
3 files changed, 69 insertions(+), 31 deletions(-)
diff --git a/changes/cbt-bugfixes b/changes/cbt-bugfixes
index 31241c9..12e62a8 100644
--- a/changes/cbt-bugfixes
+++ b/changes/cbt-bugfixes
@@ -29,5 +29,7 @@
parameter and via a LearnCircuitBuildTimeout config option. Also
automatically disable circuit build time calculation if we are either
a AuthoritativeDirectory, or if we fail to write our state file. Bug 1296.
+ - More gracefully handle corrupt state files, removing asserts in favor
+ of saving a backup and resetting state.
diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c
index 07a66cf..4090054 100644
--- a/src/or/circuitbuild.c
+++ b/src/or/circuitbuild.c
@@ -193,12 +193,11 @@ circuit_build_times_new_consensus_params(circuit_build_times_t *cbt,
int32_t num = networkstatus_get_param(ns, "cbtrecentcount",
CBT_DEFAULT_RECENT_CIRCUITS);
- if (num != cbt->liveness.num_recent_circs) {
+ if (num > 0 && num != cbt->liveness.num_recent_circs) {
int8_t *recent_circs;
log_notice(LD_CIRC, "Changing recent timeout size from %d to %d",
cbt->liveness.num_recent_circs, num);
- tor_assert(num > 0);
tor_assert(cbt->liveness.timeouts_after_firsthop);
/*
@@ -675,6 +674,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
"Corrupt state file? Build times count mismatch. "
"Read %d times, but file says %d", loaded_cnt,
state->TotalBuildTimes);
+ *msg = tor_strdup("Build times count mismatch.");
+ circuit_build_times_reset(cbt);
+ tor_free(loaded_times);
+ return -1;
}
circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt);
@@ -688,8 +691,18 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt,
log_info(LD_CIRC,
"Loaded %d/%d values from %d lines in circuit time histogram",
tot_values, cbt->total_build_times, N);
- tor_assert(cbt->total_build_times == tot_values);
- tor_assert(cbt->total_build_times <= CBT_NCIRCUITS_TO_OBSERVE);
+
+ if (cbt->total_build_times != tot_values
+ || cbt->total_build_times > CBT_NCIRCUITS_TO_OBSERVE) {
+ log_warn(LD_CIRC,
+ "Corrupt state file? Shuffled build times mismatch. "
+ "Read %d times, but file says %d", tot_values,
+ state->TotalBuildTimes);
+ *msg = tor_strdup("Build times count mismatch.");
+ circuit_build_times_reset(cbt);
+ tor_free(loaded_times);
+ return -1;
+ }
circuit_build_times_set_timeout(cbt);
@@ -742,6 +755,12 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
n++;
}
+ /*
+ * We are erring and asserting here because this can only happen
+ * in codepaths other than startup. The startup state parsing code
+ * performs this same check, and resets state if it hits it. If we
+ * hit it at runtime, something serious has gone wrong.
+ */
if (n!=cbt->total_build_times) {
log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n,
cbt->total_build_times);
diff --git a/src/or/config.c b/src/or/config.c
index 771fa03..7dee8ff 100644
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -4837,25 +4837,63 @@ or_state_validate(or_state_t *old_state, or_state_t *state,
}
/** Replace the current persistent state with <b>new_state</b> */
-static void
+static int
or_state_set(or_state_t *new_state)
{
char *err = NULL;
+ int ret = 0;
tor_assert(new_state);
config_free(&state_format, global_state);
global_state = new_state;
if (entry_guards_parse_state(global_state, 1, &err)<0) {
log_warn(LD_GENERAL,"%s",err);
tor_free(err);
+ ret = -1;
}
if (rep_hist_load_state(global_state, &err)<0) {
log_warn(LD_GENERAL,"Unparseable bandwidth history state: %s",err);
tor_free(err);
+ ret = -1;
}
if (circuit_build_times_parse_state(&circ_times, global_state, &err) < 0) {
log_warn(LD_GENERAL,"%s",err);
tor_free(err);
+ ret = -1;
}
+ return ret;
+}
+
+/**
+ * Save a broken state file to a backup location.
+ */
+static void
+or_state_save_broken(char *fname)
+{
+ int i;
+ file_status_t status;
+ size_t len = strlen(fname)+16;
+ char *fname2 = tor_malloc(len);
+ for (i = 0; i < 100; ++i) {
+ tor_snprintf(fname2, len, "%s.%d", fname, i);
+ status = file_status(fname2);
+ if (status == FN_NOENT)
+ break;
+ }
+ if (i == 100) {
+ log_warn(LD_BUG, "Unable to parse state in \"%s\"; too many saved bad "
+ "state files to move aside. Discarding the old state file.",
+ fname);
+ unlink(fname);
+ } else {
+ log_warn(LD_BUG, "Unable to parse state in \"%s\". Moving it aside "
+ "to \"%s\". This could be a bug in Tor; please tell "
+ "the developers.", fname, fname2);
+ if (rename(fname, fname2) < 0) {
+ log_warn(LD_BUG, "Weirdly, I couldn't even move the state aside. The "
+ "OS gave an error of %s", strerror(errno));
+ }
+ }
+ tor_free(fname2);
}
/** Reload the persistent state from disk, generating a new state as needed.
@@ -4917,31 +4955,8 @@ or_state_load(void)
" This is a bug in Tor.");
goto done;
} else if (badstate && contents) {
- int i;
- file_status_t status;
- size_t len = strlen(fname)+16;
- char *fname2 = tor_malloc(len);
- for (i = 0; i < 100; ++i) {
- tor_snprintf(fname2, len, "%s.%d", fname, i);
- status = file_status(fname2);
- if (status == FN_NOENT)
- break;
- }
- if (i == 100) {
- log_warn(LD_BUG, "Unable to parse state in \"%s\"; too many saved bad "
- "state files to move aside. Discarding the old state file.",
- fname);
- unlink(fname);
- } else {
- log_warn(LD_BUG, "Unable to parse state in \"%s\". Moving it aside "
- "to \"%s\". This could be a bug in Tor; please tell "
- "the developers.", fname, fname2);
- if (rename(fname, fname2) < 0) {
- log_warn(LD_BUG, "Weirdly, I couldn't even move the state aside. The "
- "OS gave an error of %s", strerror(errno));
- }
- }
- tor_free(fname2);
+ or_state_save_broken(fname);
+
tor_free(contents);
config_free(&state_format, new_state);
@@ -4953,7 +4968,9 @@ or_state_load(void)
} else {
log_info(LD_GENERAL, "Initialized state");
}
- or_state_set(new_state);
+ if (or_state_set(new_state) == -1) {
+ or_state_save_broken(fname);
+ }
new_state = NULL;
if (!contents) {
global_state->next_write = 0;
--
1.7.1