[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r15091: Start noticing and reporting bootstrapping failures too. It (in tor/trunk: doc/spec/proposals/ideas src/or)
Author: arma
Date: 2008-06-09 14:32:43 -0400 (Mon, 09 Jun 2008)
New Revision: 15091
Modified:
tor/trunk/doc/spec/proposals/ideas/xxx-bootstrap-phases.txt
tor/trunk/src/or/connection.c
tor/trunk/src/or/connection_or.c
tor/trunk/src/or/control.c
tor/trunk/src/or/directory.c
tor/trunk/src/or/or.h
Log:
Start noticing and reporting bootstrapping failures too. It looks like
we never bothered learning why OR conns fail, so next step is to add some
infrastructure for that.
Modified: tor/trunk/doc/spec/proposals/ideas/xxx-bootstrap-phases.txt
===================================================================
--- tor/trunk/doc/spec/proposals/ideas/xxx-bootstrap-phases.txt 2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/doc/spec/proposals/ideas/xxx-bootstrap-phases.txt 2008-06-09 18:32:43 UTC (rev 15091)
@@ -165,7 +165,7 @@
tag=circuit_create "Establishing circuits"
Once we've finished our TLS handshake with an entry guard, we will
- set about rying to make some 3-hop circuits in case we need them soon.
+ set about trying to make some 3-hop circuits in case we need them soon.
Phase 100:
tag=done summary="Done"
Modified: tor/trunk/src/or/connection.c
===================================================================
--- tor/trunk/src/or/connection.c 2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/connection.c 2008-06-09 18:32:43 UTC (rev 15091)
@@ -497,9 +497,18 @@
if (connection_or_nonopen_was_started_here(or_conn)) {
rep_hist_note_connect_failed(or_conn->identity_digest, now);
entry_guard_register_connect_status(or_conn->identity_digest,0,now);
- router_set_status(or_conn->identity_digest, 0);
- control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
- control_tls_error_to_reason(or_conn->tls_error));
+ if (!get_options()->HttpsProxy)
+ router_set_status(or_conn->identity_digest, 0);
+ if (conn->state == OR_CONN_STATE_CONNECTING) {
+ control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED, 0);
+ control_event_bootstrap_problem(
+ tor_socket_strerror(or_conn->socket_error), 0);
+ } else {
+ int reason = control_tls_error_to_reason(or_conn->tls_error);
+ control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
+ reason);
+ control_event_bootstrap_problem("foo", reason);
+ }
}
/* Inform any pending (not attached) circs that they should
* give up. */
@@ -1842,13 +1851,15 @@
before = buf_datalen(conn->inbuf);
if (connection_read_to_buf(conn, &max_to_read) < 0) {
/* There's a read error; kill the connection.*/
- connection_close_immediate(conn); /* Don't flush; connection is dead. */
+ if (conn->type == CONN_TYPE_OR)
+ TO_OR_CONN(conn)->socket_error = tor_socket_errno(conn->s);
if (CONN_IS_EDGE(conn)) {
edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
connection_edge_end_errno(edge_conn);
if (edge_conn->socks_request) /* broken, don't send a socks reply back */
edge_conn->socks_request->has_finished = 1;
}
+ connection_close_immediate(conn); /* Don't flush; connection is dead. */
connection_mark_for_close(conn);
return -1;
}
@@ -2145,14 +2156,11 @@
log_info(LD_NET,"in-progress connect failed. Removing.");
if (CONN_IS_EDGE(conn))
connection_edge_end_errno(TO_EDGE_CONN(conn));
+ if (conn->type == CONN_TYPE_OR)
+ TO_OR_CONN(conn)->socket_error = e;
connection_close_immediate(conn);
connection_mark_for_close(conn);
- /* it's safe to pass OPs to router_set_status(), since it just
- * ignores unrecognized routers
- */
- if (conn->type == CONN_TYPE_OR && !get_options()->HttpsProxy)
- router_set_status(TO_OR_CONN(conn)->identity_digest, 0);
return -1;
} else {
return 0; /* no change, see if next time is better */
Modified: tor/trunk/src/or/connection_or.c
===================================================================
--- tor/trunk/src/or/connection_or.c 2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/connection_or.c 2008-06-09 18:32:43 UTC (rev 15091)
@@ -546,6 +546,8 @@
}
control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED,
END_OR_CONN_REASON_TCP_REFUSED);
+ /* XXX connection_connect() can fail for all sorts of other reasons */
+ control_event_bootstrap_problem("foo", END_OR_CONN_REASON_TCP_REFUSED);
connection_free(TO_CONN(conn));
return NULL;
case 0:
@@ -795,6 +797,7 @@
router_set_status(conn->identity_digest, 0);
control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED,
END_OR_CONN_REASON_OR_IDENTITY);
+ control_event_bootstrap_problem("foo", END_OR_CONN_REASON_OR_IDENTITY);
as_advertised = 0;
}
if (authdir_mode_tests_reachability(options)) {
Modified: tor/trunk/src/or/control.c
===================================================================
--- tor/trunk/src/or/control.c 2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/control.c 2008-06-09 18:32:43 UTC (rev 15091)
@@ -3825,7 +3825,7 @@
/** Convert the name of a bootstrapping phase <b>s</b> into strings
* <b>tag</b> and <b>summary</b> suitable for display by the controller. */
-static void
+static int
bootstrap_status_to_string(bootstrap_status_t s, const char **tag,
const char **summary)
{
@@ -3887,48 +3887,94 @@
*summary = "Done";
break;
default:
- log_warn(LD_BUG, "Unrecognized bootstrap status code %d", s);
+// log_warn(LD_BUG, "Unrecognized bootstrap status code %d", s);
*tag = *summary = "unknown";
+ return -1;
}
+ return 0;
}
+/** What percentage through the bootstrap process are we? We remember
+ * this so we can avoid sending redundant bootstrap status events, and
+ * so we can guess context for the bootstrap messages which are
+ * ambiguous. */
+static int bootstrap_percent = 0;
+
+/** How many problems have we had getting to the next bootstrapping phase?
+ * These include failure to establish a connection to a Tor relay,
+ * failures to finish the TLS handshake, failures to validate the
+ * consensus document, etc. */
+static int bootstrap_problems = 0;
+
+/* We only tell the controller once we've hit a threshold of problems
+ * for the current phase. */
+#define BOOTSTRAP_PROBLEM_THRESHOLD 10
+
/** Called when Tor has made progress at bootstrapping its directory
- * information and initial circuits. <b>status</b> is the new status,
- * that is, what task we will be doing next. <b>percent</b> is zero if
- * we just started this task, else it represents progress on the task.
- */
-int
-control_event_bootstrap(bootstrap_status_t status, int percent)
+ * information and initial circuits.
+ *
+ * <b>status</b> is the new status, that is, what task we will be doing
+ * next. <b>percent</b> is zero if we just started this task, else it
+ * represents progress on the task. */
+void
+control_event_bootstrap(bootstrap_status_t status, int progress)
{
- static int last_percent = 0;
const char *tag, *summary;
- if (last_percent == 100)
- return 0; /* already bootstrapped; nothing to be done here. */
+ if (bootstrap_percent == 100)
+ return; /* already bootstrapped; nothing to be done here. */
/* special case for handshaking status, since our TLS handshaking code
* can't distinguish what the connection is going to be for. */
if (status == BOOTSTRAP_STATUS_HANDSHAKE) {
- if (last_percent < BOOTSTRAP_STATUS_CONN_OR) {
+ if (bootstrap_percent < BOOTSTRAP_STATUS_CONN_OR) {
status = BOOTSTRAP_STATUS_HANDSHAKE_DIR;
} else {
status = BOOTSTRAP_STATUS_HANDSHAKE_OR;
}
}
- if (status > last_percent || (percent && percent > last_percent)) {
+ if (status > bootstrap_percent ||
+ (progress && progress > bootstrap_percent)) {
bootstrap_status_to_string(status, &tag, &summary);
log_notice(LD_CONTROL, "Bootstrapped %d%%: %s.",
- percent ? percent : status, summary);
+ progress ? progress : status, summary);
control_event_client_status(LOG_NOTICE,
"BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\"",
- percent ? percent : status, tag, summary);
+ progress ? progress : status, tag, summary);
+ if (status > bootstrap_percent) {
+ bootstrap_percent = status; /* new milestone reached */
+ }
+ if (progress > bootstrap_percent) {
+ /* incremental progress within a milestone */
+ bootstrap_percent = progress;
+ }
+ bootstrap_problems = 0; /* Progress! Reset our problem counter. */
}
+}
- if (status > last_percent) /* new milestone reached */
- last_percent = status ;
- if (percent > last_percent) /* incremental progress within a milestone */
- last_percent = percent;
- return 0;
+/* Called when Tor has failed to make bootstrapping progress in a way
+ * that indicates a problem. <b>warn</b> gives a hint as to why, and
+ * <b>reason</b> provides an "or_conn_end_reason" tag).
+ */
+void
+control_event_bootstrap_problem(const char *warn, int reason)
+{
+ int status = bootstrap_percent;
+ const char *tag, *summary;
+
+// if (++bootstrap_problems != BOOTSTRAP_PROBLEM_THRESHOLD)
+// return; /* no worries yet */
+
+ while (bootstrap_status_to_string(status, &tag, &summary) < 0)
+ status--; /* find a recognized status string based on current progress */
+
+ log_warn(LD_CONTROL, "Problem bootstrapping. Stuck at %d%%: %s. (%s; %s)",
+ status, summary, warn,
+ or_conn_end_reason_to_string(reason));
+ control_event_client_status(LOG_WARN,
+ "BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\" WARNING=\"%s\" %s",
+ bootstrap_percent, tag, summary, warn,
+ or_conn_end_reason_to_string(reason));
}
Modified: tor/trunk/src/or/directory.c
===================================================================
--- tor/trunk/src/or/directory.c 2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/directory.c 2008-06-09 18:32:43 UTC (rev 15091)
@@ -2186,7 +2186,8 @@
case DIR_PURPOSE_FETCH_CONSENSUS: kind = "dl/consensus"; break;
case DIR_PURPOSE_FETCH_CERTIFICATE: kind = "dl/cert"; break;
case DIR_PURPOSE_FETCH_STATUS_VOTE: kind = "dl/vote"; break;
- case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: kind = "dl/detached_sig"; break;
+ case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: kind = "dl/detached_sig";
+ break;
case DIR_PURPOSE_FETCH_SERVERDESC: kind = "dl/server"; break;
case DIR_PURPOSE_FETCH_EXTRAINFO: kind = "dl/extra"; break;
case DIR_PURPOSE_UPLOAD_DIR: kind = "dl/ul-dir"; break;
Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h 2008-06-09 17:07:53 UTC (rev 15090)
+++ tor/trunk/src/or/or.h 2008-06-09 18:32:43 UTC (rev 15091)
@@ -905,6 +905,9 @@
tor_tls_t *tls; /**< TLS connection state. */
int tls_error; /**< Last tor_tls error code. */
+ /* XXX either merge this with tls_error, or do all our activity right
+ * when we compute this value so we don't have to store it. */
+ int socket_error; /**< If conn dies, remember why. */
/** When we last used this conn for any client traffic. If not
* recent, we can rate limit it further. */
time_t client_used;
@@ -3035,7 +3038,8 @@
BOOTSTRAP_STATUS_DONE=100
} bootstrap_status_t;
-int control_event_bootstrap(bootstrap_status_t status, int percent);
+void control_event_bootstrap(bootstrap_status_t status, int progress);
+void control_event_bootstrap_problem(const char *warn, int reason);
#ifdef CONTROL_PRIVATE
/* Used only by control.c and test.c */