[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r16941: {tor} More logging for mtbf/wfu calculations. (in tor/trunk: . src/common src/or)
Author: nickm
Date: 2008-09-23 14:24:20 -0400 (Tue, 23 Sep 2008)
New Revision: 16941
Modified:
tor/trunk/ChangeLog
tor/trunk/src/common/log.c
tor/trunk/src/common/log.h
tor/trunk/src/or/rephist.c
Log:
More logging for mtbf/wfu calculations.
Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog 2008-09-23 09:51:05 UTC (rev 16940)
+++ tor/trunk/ChangeLog 2008-09-23 18:24:20 UTC (rev 16941)
@@ -46,6 +46,8 @@
default (i.e. new default value for HidServDirectoryV2 is 1).
- Serve the latest v3 networkstatus consensus via the control
port. Use "getinfo dir/status-vote/current/consensus" to fetch it.
+ - Better logging about stability/reliability calculations on directory
+ servers.
o Code simplifications and refactoring:
- Revise the connection_new functions so that a more typesafe variant
Modified: tor/trunk/src/common/log.c
===================================================================
--- tor/trunk/src/common/log.c 2008-09-23 09:51:05 UTC (rev 16940)
+++ tor/trunk/src/common/log.c 2008-09-23 18:24:20 UTC (rev 16941)
@@ -718,7 +718,7 @@
static const char *domain_list[] = {
"GENERAL", "CRYPTO", "NET", "CONFIG", "FS", "PROTOCOL", "MM",
"HTTP", "APP", "CONTROL", "CIRC", "REND", "BUG", "DIR", "DIRSERV",
- "OR", "EDGE", "ACCT", NULL
+ "OR", "EDGE", "ACCT", "HIST", NULL
};
/** DOCDOC */
Modified: tor/trunk/src/common/log.h
===================================================================
--- tor/trunk/src/common/log.h 2008-09-23 09:51:05 UTC (rev 16940)
+++ tor/trunk/src/common/log.h 2008-09-23 18:24:20 UTC (rev 16941)
@@ -90,9 +90,11 @@
#define LD_EXIT LD_EDGE
/** Bandwidth accounting. */
#define LD_ACCT (1u<<17)
+/** Router history */
+#define LD_HIST (1u<<18)
/** Number of logging domains in the code. */
-#define N_LOGGING_DOMAINS 18
+#define N_LOGGING_DOMAINS 19
typedef uint32_t log_domain_mask_t;
Modified: tor/trunk/src/or/rephist.c
===================================================================
--- tor/trunk/src/or/rephist.c 2008-09-23 09:51:05 UTC (rev 16940)
+++ tor/trunk/src/or/rephist.c 2008-09-23 18:24:20 UTC (rev 16941)
@@ -284,21 +284,41 @@
hist->changed = when;
}
-/** We have just decided that this router is reachable, meaning
- * we will give it a "Running" flag for the next while. */
+/** We have just decided that this router with identity digest <b>id</b> is
+ * reachable, meaning we will give it a "Running" flag for the next while. */
void
rep_hist_note_router_reachable(const char *id, time_t when)
{
or_history_t *hist = get_or_history(id);
+ int was_in_run = 1;
+ char tbuf[ISO_TIME_LEN+1];
+
if (!started_tracking_stability)
started_tracking_stability = time(NULL);
if (hist && !hist->start_of_run) {
hist->start_of_run = when;
+ was_in_run = 0;
}
if (hist && hist->start_of_downtime) {
- long down_length = when - hist->start_of_downtime;
+ long down_length;
+
+ format_local_iso_time(tbuf, hist->start_of_downtime);
+ log_info(LD_HIST, "Router %s is now Running; it had been down since %s.",
+ hex_str(id, DIGEST_LEN), tbuf);
+ if (was_in_run)
+ log_info(LD_HIST, " (Paradoxically, it was already Running too.)");
+
+ down_length = when - hist->start_of_downtime;
hist->total_weighted_time += down_length;
hist->start_of_downtime = 0;
+ } else {
+ format_local_iso_time(tbuf, hist->start_of_run);
+ if (was_in_run)
+ log_debug(LD_HIST, "Router %s is still Running; it has been Running "
+ "since %s", hex_str(id, DIGEST_LEN), tbuf);
+ else
+ log_info(LD_HIST, "Router %s is now Running; it was previously untracked",
+ hex_str(id, DIGEST_LEN));
}
}
@@ -308,21 +328,41 @@
rep_hist_note_router_unreachable(const char *id, time_t when)
{
or_history_t *hist = get_or_history(id);
+ char tbuf[ISO_TIME_LEN+1];
+ int was_running = 0;
if (!started_tracking_stability)
started_tracking_stability = time(NULL);
if (hist && hist->start_of_run) {
/*XXXX We could treat failed connections differently from failed
* connect attempts. */
long run_length = when - hist->start_of_run;
+ format_local_iso_time(tbuf, hist->start_of_run);
+
hist->weighted_run_length += run_length;
hist->total_run_weights += 1.0;
hist->start_of_run = 0;
-
hist->weighted_uptime += run_length;
hist->total_weighted_time += run_length;
+
+ was_running = 1;
+ log_info(LD_HIST, "Router %s is now non-Running: it had previously been "
+ "Running since %s. Its total weighted uptime is %lu/%lu.",
+ hex_str(id, DIGEST_LEN), tbuf, hist->weighted_uptime,
+ hist->total_weighted_time);
}
if (hist && !hist->start_of_downtime) {
hist->start_of_downtime = when;
+
+ if (!was_running)
+ log_info(LD_HIST, "Router %s is now non-Running; it was previously "
+ "untracked.", hex_str(id, DIGEST_LEN));
+ } else {
+ if (!was_running) {
+ format_local_iso_time(tbuf, hist->start_of_downtime);
+
+ log_info(LD_HIST, "Router %s is still non-Running; it has been "
+ "non-Running since %s.", hex_str(id, DIGEST_LEN), tbuf);
+ }
}
}
@@ -350,6 +390,9 @@
alpha *= STABILITY_ALPHA;
}
+ log_info(LD_HIST, "Discounting all old stability info by a factor of %lf",
+ alpha);
+
/* Multiply every w_r_l, t_r_w pair by alpha. */
for (orhist_it = digestmap_iter_init(history_map);
!digestmap_iter_done(orhist_it);
@@ -526,7 +569,7 @@
rep_history_clean(now - get_options()->RephistTrackTime);
- log(severity, LD_GENERAL, "--------------- Dumping history information:");
+ log(severity, LD_HIST, "--------------- Dumping history information:");
for (orhist_it = digestmap_iter_init(history_map);
!digestmap_iter_done(orhist_it);
@@ -551,7 +594,7 @@
} else {
uptime=1.0;
}
- log(severity, LD_GENERAL,
+ log(severity, LD_HIST,
"OR %s [%s]: %ld/%ld good connections; uptime %ld/%ld sec (%.2f%%); "
"wmtbf %lu:%02lu:%02lu",
name1, hexdigest1,
@@ -582,7 +625,7 @@
else
len += ret;
}
- log(severity, LD_GENERAL, "%s", buffer);
+ log(severity, LD_HIST, "%s", buffer);
}
}
}
@@ -815,7 +858,7 @@
}
}
if (format != 1 && format != 2) {
- log_warn(LD_GENERAL,
+ log_warn(LD_HIST,
"Unrecognized format in mtbf history file. Skipping.");
goto err;
}
@@ -825,17 +868,17 @@
break;
if (!strcmpstart(line, "last-downrated ")) {
if (parse_iso_time(line+strlen("last-downrated "), &last_downrated)<0)
- log_warn(LD_GENERAL,"Couldn't parse downrate time in mtbf "
+ log_warn(LD_HIST,"Couldn't parse downrate time in mtbf "
"history file.");
}
if (!strcmpstart(line, "stored-at ")) {
if (parse_iso_time(line+strlen("stored-at "), &stored_at)<0)
- log_warn(LD_GENERAL,"Couldn't parse stored time in mtbf "
+ log_warn(LD_HIST,"Couldn't parse stored time in mtbf "
"history file.");
}
if (!strcmpstart(line, "tracked-since ")) {
if (parse_iso_time(line+strlen("tracked-since "), &tracked_since)<0)
- log_warn(LD_GENERAL,"Couldn't parse started-tracking time in mtbf "
+ log_warn(LD_HIST,"Couldn't parse started-tracking time in mtbf "
"history file.");
}
}
@@ -845,7 +888,7 @@
tracked_since = now;
if (!stored_at) {
- log_warn(LD_GENERAL, "No stored time recorded.");
+ log_warn(LD_HIST, "No stored time recorded.");
goto err;
}
@@ -878,7 +921,7 @@
n = sscanf(line, "%40s %ld %lf S=%10s %8s",
hexbuf, &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11);
if (n != 3 && n != 5) {
- log_warn(LD_GENERAL, "Couldn't scan line %s", escaped(line));
+ log_warn(LD_HIST, "Couldn't scan line %s", escaped(line));
continue;
}
have_mtbf = 1;
@@ -897,7 +940,7 @@
if (n == 2 || n == 4) {
have_mtbf = 1;
} else {
- log_warn(LD_GENERAL, "Couldn't scan +MTBF line %s",
+ log_warn(LD_HIST, "Couldn't scan +MTBF line %s",
escaped(mtbfline));
}
}
@@ -909,7 +952,7 @@
if (n == 2 || n == 4) {
have_wfu = 1;
} else {
- log_warn(LD_GENERAL, "Couldn't scan +WFU line %s", escaped(wfuline));
+ log_warn(LD_HIST, "Couldn't scan +WFU line %s", escaped(wfuline));
}
}
if (wfu_idx > i)
@@ -918,7 +961,7 @@
i = mtbf_idx;
}
if (base16_decode(digest, DIGEST_LEN, hexbuf, HEX_DIGEST_LEN) < 0) {
- log_warn(LD_GENERAL, "Couldn't hex string %s", escaped(hexbuf));
+ log_warn(LD_HIST, "Couldn't hex string %s", escaped(hexbuf));
continue;
}
hist = get_or_history(digest);
@@ -929,7 +972,7 @@
if (mtbf_timebuf[0]) {
mtbf_timebuf[10] = ' ';
if (parse_possibly_bad_iso_time(mtbf_timebuf, &start_of_run)<0)
- log_warn(LD_GENERAL, "Couldn't parse time %s",
+ log_warn(LD_HIST, "Couldn't parse time %s",
escaped(mtbf_timebuf));
}
hist->start_of_run = correct_time(start_of_run, now, stored_at,
@@ -944,7 +987,7 @@
if (wfu_timebuf[0]) {
wfu_timebuf[10] = ' ';
if (parse_possibly_bad_iso_time(wfu_timebuf, &start_of_downtime)<0)
- log_warn(LD_GENERAL, "Couldn't parse time %s", escaped(wfu_timebuf));
+ log_warn(LD_HIST, "Couldn't parse time %s", escaped(wfu_timebuf));
}
}
hist->start_of_downtime = correct_time(start_of_downtime, now, stored_at,
@@ -953,7 +996,7 @@
hist->total_weighted_time = total_wt_time;
}
if (strcmp(line, "."))
- log_warn(LD_GENERAL, "Truncated MTBF file.");
+ log_warn(LD_HIST, "Truncated MTBF file.");
if (tracked_since < 86400*365) /* Recover from insanely early value. */
tracked_since = latest_possible_start;
@@ -1335,7 +1378,7 @@
v = tor_parse_uint64(cp, 10, 0, UINT64_MAX, &ok, NULL);
if (!ok) {
all_ok=0;
- log_notice(LD_GENERAL, "Could not parse '%s' into a number.'", cp);
+ log_notice(LD_HIST, "Could not parse '%s' into a number.'", cp);
}
if (start < now) {
add_obs(b, start, v);
@@ -1617,7 +1660,7 @@
void
dump_pk_ops(int severity)
{
- log(severity, LD_GENERAL,
+ log(severity, LD_HIST,
"PK operations: %lu directory objects signed, "
"%lu directory objects verified, "
"%lu routerdescs signed, "