[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r10210: Partial backport candidate: We had a bug where we were downl (in tor/trunk: . src/or)



Author: nickm
Date: 2007-05-18 17:19:53 -0400 (Fri, 18 May 2007)
New Revision: 10210

Modified:
   tor/trunk/
   tor/trunk/ChangeLog
   tor/trunk/src/or/directory.c
   tor/trunk/src/or/or.h
   tor/trunk/src/or/routerlist.c
Log:
 r12982@Kushana:  nickm | 2007-05-18 15:15:14 -0400
 Partial backport candidate: We had a bug where we were downloading descriptors by descriptor digest, but trying to look them up by identity fingerprint when updating their failure count and next retry time.  (Also use correct backoff logic for extrainfo code.)  Needs testing, doubtless.



Property changes on: tor/trunk
___________________________________________________________________
 svk:merge ticket from /tor/trunk [r12982] on c95137ef-5f19-0410-b913-86e773d04f59

Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog	2007-05-18 21:19:19 UTC (rev 10209)
+++ tor/trunk/ChangeLog	2007-05-18 21:19:53 UTC (rev 10210)
@@ -47,6 +47,9 @@
     - If all of our dirservers have given us bad or no networkstatuses 
       lately, then stop hammering them once per minute even when we
       think they're failed. Fixes another part of bug 422.
+    - Back off correctly when downloading servers.  (Previously, we would
+      never actually increment the failure count for descriptors we were in
+      the process of retrieving.)
 
   o Minor fixes (resource management):
     - Count the number of open sockets separately from the number

Modified: tor/trunk/src/or/directory.c
===================================================================
--- tor/trunk/src/or/directory.c	2007-05-18 21:19:19 UTC (rev 10209)
+++ tor/trunk/src/or/directory.c	2007-05-18 21:19:53 UTC (rev 10210)
@@ -2130,52 +2130,59 @@
                                int was_extrainfo)
 {
   char digest[DIGEST_LEN];
-  local_routerstatus_t *rs;
   time_t now = time(NULL);
   int server = server_mode(get_options()) && get_options()->DirPort;
-  (void) was_extrainfo;
   SMARTLIST_FOREACH(failed, const char *, cp,
   {
+    download_status_t *dls = NULL;
     base16_decode(digest, DIGEST_LEN, cp, strlen(cp));
-    /* XXXX020 BUG BUG BUG. Fails miserably when requesting by desc digest
-     * rather than by identity digest. */
-    rs = router_get_combined_status_by_digest(digest);
-    if (!rs || rs->n_download_failures >= MAX_ROUTERDESC_DOWNLOAD_FAILURES)
+    if (was_extrainfo) {
+      signed_descriptor_t *sd =
+        router_get_by_extrainfo_digest(digest);
+      if (sd)
+        dls = &sd->ei_dl_status;
+    } else {
+      local_routerstatus_t *rs =
+        router_get_combined_status_by_descriptor_digest(digest);
+      if (rs)
+        dls = &rs->dl_status;
+    }
+    if (!dls || dls->n_download_failures >= MAX_ROUTERDESC_DOWNLOAD_FAILURES)
       continue;
     if (status_code != 503 || server)
-      ++rs->n_download_failures;
+      ++dls->n_download_failures;
     if (server) {
-      switch (rs->n_download_failures) {
-        case 0: rs->next_attempt_at = 0; break;
-        case 1: rs->next_attempt_at = 0; break;
-        case 2: rs->next_attempt_at = 0; break;
-        case 3: rs->next_attempt_at = now+60; break;
-        case 4: rs->next_attempt_at = now+60; break;
-        case 5: rs->next_attempt_at = now+60*2; break;
-        case 6: rs->next_attempt_at = now+60*5; break;
-        case 7: rs->next_attempt_at = now+60*15; break;
-        default: rs->next_attempt_at = TIME_MAX; break;
+      switch (dls->n_download_failures) {
+        case 0: dls->next_attempt_at = 0; break;
+        case 1: dls->next_attempt_at = 0; break;
+        case 2: dls->next_attempt_at = 0; break;
+        case 3: dls->next_attempt_at = now+60; break;
+        case 4: dls->next_attempt_at = now+60; break;
+        case 5: dls->next_attempt_at = now+60*2; break;
+        case 6: dls->next_attempt_at = now+60*5; break;
+        case 7: dls->next_attempt_at = now+60*15; break;
+        default: dls->next_attempt_at = TIME_MAX; break;
       }
     } else {
-      switch (rs->n_download_failures) {
-        case 0: rs->next_attempt_at = 0; break;
-        case 1: rs->next_attempt_at = 0; break;
-        case 2: rs->next_attempt_at = now+60; break;
-        case 3: rs->next_attempt_at = now+60*5; break;
-        case 4: rs->next_attempt_at = now+60*10; break;
-        default: rs->next_attempt_at = TIME_MAX; break;
+      switch (dls->n_download_failures) {
+        case 0: dls->next_attempt_at = 0; break;
+        case 1: dls->next_attempt_at = 0; break;
+        case 2: dls->next_attempt_at = now+60; break;
+        case 3: dls->next_attempt_at = now+60*5; break;
+        case 4: dls->next_attempt_at = now+60*10; break;
+        default: dls->next_attempt_at = TIME_MAX; break;
       }
     }
-    if (rs->next_attempt_at == 0)
+    if (dls->next_attempt_at == 0)
       log_debug(LD_DIR, "%s failed %d time(s); I'll try again immediately.",
-                cp, (int)rs->n_download_failures);
-    else if (rs->next_attempt_at < TIME_MAX)
+                cp, (int)dls->n_download_failures);
+    else if (dls->next_attempt_at < TIME_MAX)
       log_debug(LD_DIR, "%s failed %d time(s); I'll try again in %d seconds.",
-                cp, (int)rs->n_download_failures,
-                (int)(rs->next_attempt_at-now));
+                cp, (int)dls->n_download_failures,
+                (int)(dls->next_attempt_at-now));
     else
       log_debug(LD_DIR, "%s failed %d time(s); Giving up for a while.",
-                cp, (int)rs->n_download_failures);
+                cp, (int)dls->n_download_failures);
   });
 
   /* No need to relaunch descriptor downloads here: we already do it

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2007-05-18 21:19:19 UTC (rev 10209)
+++ tor/trunk/src/or/or.h	2007-05-18 21:19:53 UTC (rev 10210)
@@ -1049,6 +1049,14 @@
   SAVED_IN_JOURNAL
 } saved_location_t;
 
+/** DOCDOC */
+typedef struct download_status_t {
+  time_t next_attempt_at; /**< When should we try downloading this descriptor
+                           * again? */
+  uint8_t n_download_failures; /**< Number of failures trying to download the
+                                * most recent descriptor. */
+} download_status_t;
+
 /** Information need to cache an onion router's descriptor. */
 typedef struct signed_descriptor_t {
   /** Pointer to the raw server descriptor.  Not necessarily NUL-terminated.
@@ -1062,8 +1070,10 @@
   char identity_digest[DIGEST_LEN];
   /** Declared publication time of the descriptor */
   time_t published_on;
- /** DOCDOC; routerinfo_t only. */
+  /** DOCDOC; routerinfo_t only. */
   char extra_info_digest[DIGEST_LEN];
+  /** DOCDOC; routerinfo_t only: for the corresponding extrainfo. */
+  download_status_t ei_dl_status;
   /** Where is the descriptor saved? */
   saved_location_t saved_location;
   /** If saved_location is SAVED_IN_CACHE or SAVED_IN_JOURNAL, the offset of
@@ -1071,8 +1081,6 @@
   off_t saved_offset;
   /* DOCDOC */
   unsigned int do_not_cache : 1;
-  /* DOCDOC; XXXX020 replace with something smarter. */
-  unsigned int tried_downloading_extrainfo : 1;
 } signed_descriptor_t;
 
 /** Information about another onion router in the network. */
@@ -1219,12 +1227,9 @@
    * descriptor_digest represents the descriptor we would most like to use for
    * this router. */
   routerstatus_t status;
-  time_t next_attempt_at; /**< When should we try downloading this descriptor
-                           * again? */
   time_t last_dir_503_at; /**< When did this router last tell us that it
                            * was too busy to serve directory info? */
-  uint8_t n_download_failures; /**< Number of failures trying to download the
-                                * most recent descriptor. */
+  download_status_t dl_status;
   unsigned int name_lookup_warned:1; /**< Have we warned the user for referring
                                       * to this (unnamed) router by nickname?
                                       */
@@ -1285,6 +1290,8 @@
   /** Map from extra-info digest to a signed_descriptor_t.  Only for
    * routers in routers or old_routers. */
   digestmap_t *extra_info_map;
+  /** DOCDOC */
+  digestmap_t *desc_by_eid_map;
   /** List of routerinfo_t for all currently live routers we know. */
   smartlist_t *routers;
   /** List of signed_descriptor_t for older router descriptors we're
@@ -3107,6 +3114,7 @@
 routerinfo_t *router_get_by_hexdigest(const char *hexdigest);
 routerinfo_t *router_get_by_digest(const char *digest);
 signed_descriptor_t *router_get_by_descriptor_digest(const char *digest);
+signed_descriptor_t *router_get_by_extrainfo_digest(const char *digest);
 signed_descriptor_t *extrainfo_get_by_descriptor_digest(const char *digest);
 const char *signed_descriptor_get_body(signed_descriptor_t *desc);
 int router_digest_version_as_new_as(const char *digest, const char *cutoff);
@@ -3159,6 +3167,9 @@
 int any_trusted_dir_is_v1_authority(void);
 networkstatus_t *networkstatus_get_by_digest(const char *digest);
 local_routerstatus_t *router_get_combined_status_by_digest(const char *digest);
+local_routerstatus_t *router_get_combined_status_by_descriptor_digest(
+                                                          const char *digest);
+
 routerstatus_t *routerstatus_get_by_hexdigest(const char *hexdigest);
 void update_networkstatus_downloads(time_t now);
 void update_router_descriptor_downloads(time_t now);

Modified: tor/trunk/src/or/routerlist.c
===================================================================
--- tor/trunk/src/or/routerlist.c	2007-05-18 21:19:19 UTC (rev 10209)
+++ tor/trunk/src/or/routerlist.c	2007-05-18 21:19:53 UTC (rev 10210)
@@ -58,6 +58,8 @@
 /** Global list of local_routerstatus_t for each router, known or unknown.
  * Kept sorted by digest. */
 static smartlist_t *routerstatus_list = NULL;
+/** DOCDOC */
+static digestmap_t *routerstatus_by_desc_digest_map = NULL;
 
 /** Map from lowercase nickname to digest of named server, if any. */
 static strmap_t *named_server_map = NULL;
@@ -1520,6 +1522,18 @@
   return digestmap_get(routerlist->desc_digest_map, digest);
 }
 
+/** Return the router in our routerlist whose 20-byte descriptor
+ * is <b>digest</b>.  Return NULL if no such router is known. */
+signed_descriptor_t *
+router_get_by_extrainfo_digest(const char *digest)
+{
+  tor_assert(digest);
+
+  if (!routerlist) return NULL;
+
+  return digestmap_get(routerlist->desc_by_eid_map, digest);
+}
+
 /** DOCDOC */
 signed_descriptor_t *
 extrainfo_get_by_descriptor_digest(const char *digest)
@@ -1567,6 +1581,7 @@
     routerlist->old_routers = smartlist_create();
     routerlist->identity_map = digestmap_new();
     routerlist->desc_digest_map = digestmap_new();
+    routerlist->desc_by_eid_map = digestmap_new();
     routerlist->extra_info_map = digestmap_new();
   }
   return routerlist;
@@ -1641,6 +1656,7 @@
   tor_assert(rl);
   digestmap_free(rl->identity_map, NULL);
   digestmap_free(rl->desc_digest_map, NULL);
+  digestmap_free(rl->desc_by_eid_map, NULL);
   digestmap_free(rl->extra_info_map, _extrainfo_free);
   SMARTLIST_FOREACH(rl->routers, routerinfo_t *, r,
                     routerinfo_free(r));
@@ -1726,6 +1742,9 @@
   tor_assert(!ri_old);
   digestmap_set(rl->desc_digest_map, ri->cache_info.signed_descriptor_digest,
                 &(ri->cache_info));
+  if (!tor_digest_is_zero(ri->cache_info.extra_info_digest))
+    digestmap_set(rl->desc_by_eid_map, ri->cache_info.extra_info_digest,
+                  &ri->cache_info);
   smartlist_add(rl->routers, ri);
   ri->routerlist_index = smartlist_len(rl->routers) - 1;
   router_dir_info_changed();
@@ -1793,6 +1812,8 @@
     signed_descriptor_t *sd = signed_descriptor_from_routerinfo(ri);
     digestmap_set(rl->desc_digest_map, sd->signed_descriptor_digest, sd);
     smartlist_add(rl->old_routers, sd);
+    if (!tor_digest_is_zero(sd->extra_info_digest))
+      digestmap_set(rl->desc_by_eid_map, sd->extra_info_digest, sd);
   } else {
     routerinfo_free(ri);
   }
@@ -1839,7 +1860,6 @@
                               ri->cache_info.signed_descriptor_digest);
     tor_assert(ri_tmp == ri);
     router_store_stats.bytes_dropped += ri->cache_info.signed_descriptor_len;
-    routerinfo_free(ri);
     ei_tmp = digestmap_remove(rl->extra_info_map,
                               ri->cache_info.extra_info_digest);
     if (ei_tmp) {
@@ -1847,6 +1867,9 @@
         ei_tmp->cache_info.signed_descriptor_len;
       extrainfo_free(ei_tmp);
     }
+    if (!tor_digest_is_zero(ri->cache_info.extra_info_digest))
+      digestmap_remove(rl->desc_by_eid_map, ri->cache_info.extra_info_digest);
+    routerinfo_free(ri);
   }
   // routerlist_assert_ok(rl);
   routerlist_check_bug_417();
@@ -1867,7 +1890,6 @@
                             sd->signed_descriptor_digest);
   tor_assert(sd_tmp == sd);
   router_store_stats.bytes_dropped += sd->signed_descriptor_len;
-  signed_descriptor_free(sd);
 
   ei_tmp = digestmap_remove(rl->extra_info_map,
                             sd->extra_info_digest);
@@ -1876,7 +1898,10 @@
       ei_tmp->cache_info.signed_descriptor_len;
     extrainfo_free(ei_tmp);
   }
+  if (!tor_digest_is_zero(sd->extra_info_digest))
+    digestmap_remove(rl->desc_by_eid_map, sd->extra_info_digest);
 
+  signed_descriptor_free(sd);
   routerlist_check_bug_417();
   // routerlist_assert_ok(rl);
 }
@@ -1940,6 +1965,9 @@
         extrainfo_free(ei_tmp);
       }
     }
+    if (!tor_digest_is_zero(ri_old->cache_info.extra_info_digest))
+      digestmap_remove(rl->desc_by_eid_map,
+                       ri_old->cache_info.extra_info_digest);
     routerinfo_free(ri_old);
   }
   // routerlist_assert_ok(rl);
@@ -1981,6 +2009,10 @@
     smartlist_free(routerstatus_list);
     routerstatus_list = NULL;
   }
+  if (routerstatus_by_desc_digest_map) {
+    digestmap_free(routerstatus_by_desc_digest_map, NULL);
+    routerstatus_by_desc_digest_map = NULL;
+  }
   if (named_server_map) {
     strmap_free(named_server_map, _tor_free);
   }
@@ -2941,6 +2973,24 @@
                            _compare_digest_to_routerstatus_entry);
 }
 
+/** DOCDOC */
+local_routerstatus_t *
+router_get_combined_status_by_descriptor_digest(const char *digest)
+{
+  if (!routerstatus_by_desc_digest_map)
+    return NULL;
+#if 0
+  /* XXXX020 this could conceivably be critical path when a whole lot
+   * of descriptors fail.  Maybe we should use a digest map instead.*/
+  SMARTLIST_FOREACH(routerstatus_list, local_routerstatus_t *, lrs,
+                    if (!memcmp(lrs->status.descriptor_digest, digest))
+                      return lrs);
+  return NULL;
+#else
+  return digestmap_get(routerstatus_by_desc_digest_map, digest);
+#endif
+}
+
 /** Given a nickname (possibly verbose, possibly a hexadecimal digest), return
  * the corresponding local_routerstatus_t, or NULL if none exists.  Warn the
  * user if <b>warn_if_unnamed</b> is set, and they have specified a router by
@@ -3916,8 +3966,8 @@
     if ((rs_old = router_get_combined_status_by_digest(lowest))) {
       if (!memcmp(rs_out->status.descriptor_digest,
                   most_recent->descriptor_digest, DIGEST_LEN)) {
-        rs_out->n_download_failures = rs_old->n_download_failures;
-        rs_out->next_attempt_at = rs_old->next_attempt_at;
+        rs_out->dl_status.n_download_failures = rs_old->dl_status.n_download_failures;
+        rs_out->dl_status.next_attempt_at = rs_old->dl_status.next_attempt_at;
       }
       rs_out->name_lookup_warned = rs_old->name_lookup_warned;
       rs_out->last_dir_503_at = rs_old->last_dir_503_at;
@@ -3964,6 +4014,14 @@
   smartlist_free(routerstatus_list);
   routerstatus_list = result;
 
+  if (routerstatus_by_desc_digest_map)
+    digestmap_free(routerstatus_by_desc_digest_map, NULL);
+  routerstatus_by_desc_digest_map = digestmap_new();
+  SMARTLIST_FOREACH(routerstatus_list, local_routerstatus_t *, rs,
+                    digestmap_set(routerstatus_by_desc_digest_map,
+                                  rs->status.descriptor_digest,
+                                  rs));
+
   tor_free(networkstatus);
   tor_free(index);
   tor_free(size);
@@ -4019,8 +4077,8 @@
       ds->n_networkstatus_failures = 0;
     }
     if (reset_failures) {
-      rs->n_download_failures = 0;
-      rs->next_attempt_at = 0;
+      rs->dl_status.n_download_failures = 0;
+      rs->dl_status.next_attempt_at = 0;
     }
   });
   router_dir_info_changed();
@@ -4165,7 +4223,7 @@
       /* Oddly, we have a descriptor more recent than the 'best' one, but it
          was once best. So that's okay. */
       ++n_uptodate;
-    } else if (rs->next_attempt_at > now) {
+    } else if (rs->dl_status.next_attempt_at > now) {
       /* We failed too recently to try again. */
       ++n_not_ready;
     } else {
@@ -4455,13 +4513,12 @@
 static INLINE int
 should_download_extrainfo(signed_descriptor_t *sd,
                           const routerlist_t *rl,
-                          const digestmap_t *pending)
+                          const digestmap_t *pending,
+                          time_t now)
 {
   const char *d = sd->extra_info_digest;
-  /* XXXX020 Check for failures; keep a failure count.  Don't just
-   * do this dumb "try once and give up" thing. */
-  return (!sd->tried_downloading_extrainfo &&
-          !tor_digest_is_zero(d) &&
+  return (!tor_digest_is_zero(d) &&
+          sd->ei_dl_status.next_attempt_at <= now &&
           !digestmap_get(rl->extra_info_map, d) &&
           !digestmap_get(pending, d));
 }
@@ -4475,7 +4532,6 @@
   smartlist_t *wanted;
   digestmap_t *pending;
   int i;
-  (void) now;
   if (! options->DownloadExtraInfo)
     return;
 
@@ -4484,16 +4540,14 @@
   rl = router_get_routerlist();
   wanted = smartlist_create();
   SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
-      if (should_download_extrainfo(&ri->cache_info, rl, pending)) {
+      if (should_download_extrainfo(&ri->cache_info, rl, pending, now)) {
         smartlist_add(wanted, ri->cache_info.extra_info_digest);
-        ri->cache_info.tried_downloading_extrainfo = 1; /*XXXX020 be smarter.*/
       }
     });
   if (options->DirPort) {
     SMARTLIST_FOREACH(rl->old_routers, signed_descriptor_t *, sd, {
-        if (should_download_extrainfo(sd, rl, pending)) {
+        if (should_download_extrainfo(sd, rl, pending, now)) {
           smartlist_add(wanted, sd->extra_info_digest);
-          sd->tried_downloading_extrainfo = 1; /*XXXX020 be smarter. */
         }
       });
   }
@@ -4637,9 +4691,10 @@
     return;
   SMARTLIST_FOREACH(routerstatus_list, local_routerstatus_t *, rs,
   {
-    rs->n_download_failures = 0;
-    rs->next_attempt_at = 0;
+    rs->dl_status.n_download_failures = 0;
+    rs->dl_status.next_attempt_at = 0;
   });
+  /* XXXX020 reset extrainfo dl status too. */
   tor_assert(networkstatus_list);
   SMARTLIST_FOREACH(networkstatus_list, networkstatus_t *, ns,
      SMARTLIST_FOREACH(ns->entries, routerstatus_t *, rs,