[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [tor/master] Remove smartlist_choose_node_by_bandwidth()



commit 415a841378db2489c525ea0c55b169bd2894e992
Author: Nick Mathewson <nickm@xxxxxxxxxxxxxx>
Date:   Mon Nov 3 13:30:19 2014 -0500

    Remove smartlist_choose_node_by_bandwidth()
    
    We were only using it when smartlist_choose_node_by_bandwidth_weights
    failed.  But that function could only fail in the presence of
    buggy/ancient authorities or in the absence of a consensus.  Either
    way, it's better to use sensible defaults and a nicer algorithm.
---
 changes/bug13126    |   10 +++
 src/or/or.h         |    2 +-
 src/or/routerlist.c |  236 +++------------------------------------------------
 3 files changed, 25 insertions(+), 223 deletions(-)

diff --git a/changes/bug13126 b/changes/bug13126
new file mode 100644
index 0000000..45d22ee
--- /dev/null
+++ b/changes/bug13126
@@ -0,0 +1,10 @@
+  o Code simplification and refactoring:
+
+    - Remove our old, non-weighted bandwidth-based node selection code.
+      Previously, we used it as a fallback when we couldn't perform
+      weighted bandwidth-based node selection.  But that would only
+      happen in the cases where we had no consensus, or when we had a
+      consensus generated by buggy or ancient directory authorities.  In
+      either case, it's better to use the more modern, better maintained
+      algorithm, with reasonable defaults for the weights. Closes
+      ticket 13126.
\ No newline at end of file
diff --git a/src/or/or.h b/src/or/or.h
index 6170c21..a681def 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -5006,7 +5006,7 @@ typedef enum was_router_added_t {
   ROUTER_NOT_IN_CONSENSUS_OR_NETWORKSTATUS = -4,
   ROUTER_AUTHDIR_REJECTS = -5,
   ROUTER_WAS_NOT_WANTED = -6,
-  ROUTER_WAS_TOO_OLD = -7,
+  ROUTER_WAS_TOO_OLD = -7, /* note contrast with 'NOT_NEW' */
 } was_router_added_t;
 
 /********************************* routerparse.c ************************/
diff --git a/src/or/routerlist.c b/src/or/routerlist.c
index d81dae4..46010ea 100644
--- a/src/or/routerlist.c
+++ b/src/or/routerlist.c
@@ -2030,9 +2030,10 @@ compute_weighted_bandwidths(const smartlist_t *sl,
   if (Wg < 0 || Wm < 0 || We < 0 || Wd < 0 || Wgb < 0 || Wmb < 0 || Wdb < 0
       || Web < 0) {
     log_debug(LD_CIRC,
-              "Got negative bandwidth weights. Defaulting to old selection"
+              "Got negative bandwidth weights. Defaulting to naive selection"
               " algorithm.");
-    return -1; // Use old algorithm.
+    Wg = Wm = We = Wd = weight_scale;
+    Wgb = Wmb = Web = Wdb = weight_scale;
   }
 
   Wg /= weight_scale;
@@ -2048,6 +2049,7 @@ compute_weighted_bandwidths(const smartlist_t *sl,
   bandwidths = tor_calloc(smartlist_len(sl), sizeof(u64_dbl_t));
 
   // Cycle through smartlist and total the bandwidth.
+  static int warned_missing_bw = 0;
   SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) {
     int is_exit = 0, is_guard = 0, is_dir = 0, this_bw = 0;
     double weight = 1;
@@ -2056,15 +2058,18 @@ compute_weighted_bandwidths(const smartlist_t *sl,
     is_dir = node_is_dir(node);
     if (node->rs) {
       if (!node->rs->has_bandwidth) {
-        tor_free(bandwidths);
         /* This should never happen, unless all the authorites downgrade
          * to 0.2.0 or rogue routerstatuses get inserted into our consensus. */
-        log_warn(LD_BUG,
-                 "Consensus is not listing bandwidths. Defaulting back to "
-                 "old router selection algorithm.");
-        return -1;
+        if (! warned_missing_bw) {
+          log_warn(LD_BUG,
+                 "Consensus is missing some bandwidths. Using a naive "
+                 "router selection algorithm");
+          warned_missing_bw = 1;
+        }
+        this_bw = 30000; /* Chosen arbitrarily */
+      } else {
+        this_bw = kb_to_bytes(node->rs->bandwidth_kb);
       }
-      this_bw = kb_to_bytes(node->rs->bandwidth_kb);
     } else if (node->ri) {
       /* bridge or other descriptor not in our consensus */
       this_bw = bridge_get_advertised_bandwidth_bounded(node->ri);
@@ -2141,226 +2146,13 @@ frac_nodes_with_descriptors(const smartlist_t *sl,
   return present / total;
 }
 
-/** Helper function:
- * choose a random node_t element of smartlist <b>sl</b>, weighted by
- * the advertised bandwidth of each element.
- *
- * If <b>rule</b>==WEIGHT_FOR_EXIT. we're picking an exit node: consider all
- * nodes' bandwidth equally regardless of their Exit status, since there may
- * be some in the list because they exit to obscure ports. If
- * <b>rule</b>==NO_WEIGHTING, we're picking a non-exit node: weight
- * exit-node's bandwidth less depending on the smallness of the fraction of
- * Exit-to-total bandwidth.  If <b>rule</b>==WEIGHT_FOR_GUARD, we're picking a
- * guard node: consider all guard's bandwidth equally. Otherwise, weight
- * guards proportionally less.
- */
-static const node_t *
-smartlist_choose_node_by_bandwidth(const smartlist_t *sl,
-                                   bandwidth_weight_rule_t rule)
-{
-  unsigned int i;
-  u64_dbl_t *bandwidths;
-  int is_exit;
-  int is_guard;
-  int is_fast;
-  double total_nonexit_bw = 0, total_exit_bw = 0;
-  double total_nonguard_bw = 0, total_guard_bw = 0;
-  double exit_weight;
-  double guard_weight;
-  int n_unknown = 0;
-  bitarray_t *fast_bits;
-  bitarray_t *exit_bits;
-  bitarray_t *guard_bits;
-
-  // This function does not support WEIGHT_FOR_DIR
-  // or WEIGHT_FOR_MID
-  if (rule == WEIGHT_FOR_DIR || rule == WEIGHT_FOR_MID) {
-    rule = NO_WEIGHTING;
-  }
-
-  /* Can't choose exit and guard at same time */
-  tor_assert(rule == NO_WEIGHTING ||
-             rule == WEIGHT_FOR_EXIT ||
-             rule == WEIGHT_FOR_GUARD);
-
-  if (smartlist_len(sl) == 0) {
-    log_info(LD_CIRC,
-             "Empty routerlist passed in to old node selection for rule %s",
-             bandwidth_weight_rule_to_string(rule));
-    return NULL;
-  }
-
-  /* First count the total bandwidth weight, and make a list
-   * of each value.  We use UINT64_MAX to indicate "unknown". */
-  bandwidths = tor_calloc(smartlist_len(sl), sizeof(u64_dbl_t));
-  fast_bits = bitarray_init_zero(smartlist_len(sl));
-  exit_bits = bitarray_init_zero(smartlist_len(sl));
-  guard_bits = bitarray_init_zero(smartlist_len(sl));
-
-  /* Iterate over all the routerinfo_t or routerstatus_t, and */
-  SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) {
-    /* first, learn what bandwidth we think i has */
-    int is_known = 1;
-    uint32_t this_bw = 0;
-    i = node_sl_idx;
-
-    is_exit = node_is_good_exit(node);
-    is_guard = node->is_possible_guard;
-    if (node->rs) {
-      if (node->rs->has_bandwidth) {
-        this_bw = kb_to_bytes(node->rs->bandwidth_kb);
-      } else { /* guess */
-        is_known = 0;
-      }
-    } else if (node->ri) {
-      /* Must be a bridge if we're willing to use it */
-      this_bw = bridge_get_advertised_bandwidth_bounded(node->ri);
-    }
-
-    if (is_exit)
-      bitarray_set(exit_bits, i);
-    if (is_guard)
-      bitarray_set(guard_bits, i);
-    if (node->is_fast)
-      bitarray_set(fast_bits, i);
-
-    if (is_known) {
-      bandwidths[i].dbl = this_bw;
-      if (is_guard)
-        total_guard_bw += this_bw;
-      else
-        total_nonguard_bw += this_bw;
-      if (is_exit)
-        total_exit_bw += this_bw;
-      else
-        total_nonexit_bw += this_bw;
-    } else {
-      ++n_unknown;
-      bandwidths[i].dbl = -1.0;
-    }
-  } SMARTLIST_FOREACH_END(node);
-
-#define EPSILON .1
-
-  /* Now, fill in the unknown values. */
-  if (n_unknown) {
-    int32_t avg_fast, avg_slow;
-    if (total_exit_bw+total_nonexit_bw < EPSILON) {
-      /* if there's some bandwidth, there's at least one known router,
-       * so no worries about div by 0 here */
-      int n_known = smartlist_len(sl)-n_unknown;
-      avg_fast = avg_slow = (int32_t)
-        ((total_exit_bw+total_nonexit_bw)/((uint64_t) n_known));
-    } else {
-      avg_fast = 40000;
-      avg_slow = 20000;
-    }
-    for (i=0; i<(unsigned)smartlist_len(sl); ++i) {
-      if (bandwidths[i].dbl >= 0.0)
-        continue;
-      is_fast = bitarray_is_set(fast_bits, i);
-      is_exit = bitarray_is_set(exit_bits, i);
-      is_guard = bitarray_is_set(guard_bits, i);
-      bandwidths[i].dbl = is_fast ? avg_fast : avg_slow;
-      if (is_exit)
-        total_exit_bw += bandwidths[i].dbl;
-      else
-        total_nonexit_bw += bandwidths[i].dbl;
-      if (is_guard)
-        total_guard_bw += bandwidths[i].dbl;
-      else
-        total_nonguard_bw += bandwidths[i].dbl;
-    }
-  }
-
-  /* If there's no bandwidth at all, pick at random. */
-  if (total_exit_bw+total_nonexit_bw < EPSILON) {
-    tor_free(bandwidths);
-    tor_free(fast_bits);
-    tor_free(exit_bits);
-    tor_free(guard_bits);
-    return smartlist_choose(sl);
-  }
-
-  /* Figure out how to weight exits and guards */
-  {
-    double all_bw = U64_TO_DBL(total_exit_bw+total_nonexit_bw);
-    double exit_bw = U64_TO_DBL(total_exit_bw);
-    double guard_bw = U64_TO_DBL(total_guard_bw);
-    /*
-     * For detailed derivation of this formula, see
-     *   http://archives.seul.org/or/dev/Jul-2007/msg00056.html
-     */
-    if (rule == WEIGHT_FOR_EXIT || total_exit_bw<EPSILON)
-      exit_weight = 1.0;
-    else
-      exit_weight = 1.0 - all_bw/(3.0*exit_bw);
-
-    if (rule == WEIGHT_FOR_GUARD || total_guard_bw<EPSILON)
-      guard_weight = 1.0;
-    else
-      guard_weight = 1.0 - all_bw/(3.0*guard_bw);
-
-    if (exit_weight <= 0.0)
-      exit_weight = 0.0;
-
-    if (guard_weight <= 0.0)
-      guard_weight = 0.0;
-
-    for (i=0; i < (unsigned)smartlist_len(sl); i++) {
-      tor_assert(bandwidths[i].dbl >= 0.0);
-
-      is_exit = bitarray_is_set(exit_bits, i);
-      is_guard = bitarray_is_set(guard_bits, i);
-      if (is_exit && is_guard)
-        bandwidths[i].dbl *= exit_weight * guard_weight;
-      else if (is_guard)
-        bandwidths[i].dbl *= guard_weight;
-      else if (is_exit)
-        bandwidths[i].dbl *= exit_weight;
-    }
-  }
-
-#if 0
-  log_debug(LD_CIRC, "Total weighted bw = "U64_FORMAT
-            ", exit bw = "U64_FORMAT
-            ", nonexit bw = "U64_FORMAT", exit weight = %f "
-            "(for exit == %d)"
-            ", guard bw = "U64_FORMAT
-            ", nonguard bw = "U64_FORMAT", guard weight = %f "
-            "(for guard == %d)",
-            U64_PRINTF_ARG(total_bw),
-            U64_PRINTF_ARG(total_exit_bw), U64_PRINTF_ARG(total_nonexit_bw),
-            exit_weight, (int)(rule == WEIGHT_FOR_EXIT),
-            U64_PRINTF_ARG(total_guard_bw), U64_PRINTF_ARG(total_nonguard_bw),
-            guard_weight, (int)(rule == WEIGHT_FOR_GUARD));
-#endif
-
-  scale_array_elements_to_u64(bandwidths, smartlist_len(sl), NULL);
-
-  {
-    int idx = choose_array_element_by_weight(bandwidths,
-                                             smartlist_len(sl));
-    tor_free(bandwidths);
-    tor_free(fast_bits);
-    tor_free(exit_bits);
-    tor_free(guard_bits);
-    return idx < 0 ? NULL : smartlist_get(sl, idx);
-  }
-}
-
 /** Choose a random element of status list <b>sl</b>, weighted by
  * the advertised bandwidth of each node */
 const node_t *
 node_sl_choose_by_bandwidth(const smartlist_t *sl,
                             bandwidth_weight_rule_t rule)
 { /*XXXX MOVE */
-  const node_t *ret;
-  if ((ret = smartlist_choose_node_by_bandwidth_weights(sl, rule))) {
-    return ret;
-  } else {
-    return smartlist_choose_node_by_bandwidth(sl, rule);
-  }
+  return smartlist_choose_node_by_bandwidth_weights(sl, rule);
 }
 
 /** Return a random running node from the nodelist. Never



_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits