[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] [tor/master] Add functions to serve microdescs and flavored consensuses.



Author: Nick Mathewson <nickm@xxxxxxxxxxxxxx>
Date: Sun, 18 Oct 2009 15:45:57 -0400
Subject: Add functions to serve microdescs and flavored consensuses.
Commit: bb22d8fc45f143666f0ee676e0aeae200104a421

---
 src/or/directory.c  |  166 +++++++++++++++++++++++++++++++++++++--------------
 src/or/dirserv.c    |   93 +++++++++++++++++++++++++---
 src/or/microdesc.c  |   35 +++++++++++
 src/or/or.h         |   19 +++++-
 src/or/routerlist.c |    2 +-
 src/test/test_dir.c |  131 +++++++++++++++++++++++++++++++++++++++-
 6 files changed, 385 insertions(+), 61 deletions(-)

diff --git a/src/or/directory.c b/src/or/directory.c
index acee78f..01647ce 100644
--- a/src/or/directory.c
+++ b/src/or/directory.c
@@ -92,6 +92,7 @@ static void directory_initiate_command_rend(const char *address,
 #define ROUTERDESC_CACHE_LIFETIME (30*60)
 #define ROUTERDESC_BY_DIGEST_CACHE_LIFETIME (48*60*60)
 #define ROBOTS_CACHE_LIFETIME (24*60*60)
+#define MICRODESC_CACHE_LIFETIME (48*60*60)
 
 /********* END VARIABLES ************/
 
@@ -610,7 +611,7 @@ connection_dir_download_networkstatus_failed(dir_connection_t *conn,
      * failed, and possibly retry them later.*/
     smartlist_t *failed = smartlist_create();
     dir_split_resource_into_fingerprints(conn->requested_resource+3,
-                                         failed, NULL, 0, 0);
+                                         failed, NULL, 0);
     if (smartlist_len(failed)) {
       dir_networkstatus_download_failed(failed, status_code);
       SMARTLIST_FOREACH(failed, char *, cp, tor_free(cp));
@@ -647,7 +648,7 @@ connection_dir_download_cert_failed(dir_connection_t *conn, int status)
     return;
   failed = smartlist_create();
   dir_split_resource_into_fingerprints(conn->requested_resource+3,
-                                       failed, NULL, 1, 0);
+                                       failed, NULL, DSR_HEX);
   SMARTLIST_FOREACH(failed, char *, cp,
   {
     authority_cert_dl_failed(cp, status);
@@ -1564,7 +1565,7 @@ connection_dir_client_reached_eof(dir_connection_t *conn)
       source = NS_FROM_DIR_BY_FP;
       which = smartlist_create();
       dir_split_resource_into_fingerprints(conn->requested_resource+3,
-                                           which, NULL, 0, 0);
+                                           which, NULL, 0);
     } else if (conn->requested_resource &&
                !strcmpstart(conn->requested_resource, "all")) {
       source = NS_FROM_DIR_ALL;
@@ -1717,7 +1718,7 @@ connection_dir_client_reached_eof(dir_connection_t *conn)
       which = smartlist_create();
       dir_split_resource_into_fingerprints(conn->requested_resource +
                                              (descriptor_digests ? 2 : 3),
-                                           which, NULL, 0, 0);
+                                           which, NULL, 0);
       n_asked_for = smartlist_len(which);
     }
     if (status_code != 200) {
@@ -2328,7 +2329,7 @@ client_likes_consensus(networkstatus_t *v, const char *want_url)
   int need_at_least;
   int have = 0;
 
-  dir_split_resource_into_fingerprints(want_url, want_authorities, NULL, 0, 0);
+  dir_split_resource_into_fingerprints(want_url, want_authorities, NULL, 0);
   need_at_least = smartlist_len(want_authorities)/2+1;
   SMARTLIST_FOREACH_BEGIN(want_authorities, const char *, d) {
     char want_digest[DIGEST_LEN];
@@ -2504,6 +2505,7 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
     const char *request_type = NULL;
     const char *key = url + strlen("/tor/status/");
     long lifetime = NETWORKSTATUS_CACHE_LIFETIME;
+
     if (!is_v3) {
       dirserv_get_networkstatus_v2_fingerprints(dir_fps, key);
       if (!strcmpstart(key, "fp/"))
@@ -2518,19 +2520,44 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
     } else {
       networkstatus_t *v = networkstatus_get_latest_consensus();
       time_t now = time(NULL);
+      const char *want_fps = NULL;
+      char *flavor = NULL;
       #define CONSENSUS_URL_PREFIX "/tor/status-vote/current/consensus/"
-      if (v &&
-          !strcmpstart(url, CONSENSUS_URL_PREFIX) &&
-          !client_likes_consensus(v, url + strlen(CONSENSUS_URL_PREFIX))) {
+      #define CONSENSUS_FLAVORED_PREFIX "/tor/status-vote/current/consensus-"
+      /* figure out the flavor if any, and who we wanted to sign the thing */
+      if (!strcmpstart(url, CONSENSUS_FLAVORED_PREFIX)) {
+        const char *f, *cp;
+        f = url + strlen(CONSENSUS_FLAVORED_PREFIX);
+        cp = strchr(f, '/');
+        if (cp) {
+          want_fps = cp+1;
+          flavor = tor_strndup(f, cp-f);
+        } else {
+          flavor = tor_strdup(f);
+        }
+      } else {
+        if (!strcmpstart(url, CONSENSUS_URL_PREFIX))
+          want_fps = url+strlen(CONSENSUS_URL_PREFIX);
+      }
+
+      /* XXXX MICRODESC NM NM should check document of correct flavor */
+      if (v && want_fps &&
+          !client_likes_consensus(v, want_fps)) {
         write_http_status_line(conn, 404, "Consensus not signed by sufficient "
                                           "number of requested authorities");
         smartlist_free(dir_fps);
         geoip_note_ns_response(act, GEOIP_REJECT_NOT_ENOUGH_SIGS);
+        tor_free(flavor);
         goto done;
       }
 
-      smartlist_add(dir_fps, tor_memdup("\0\0\0\0\0\0\0\0\0\0"
-                                        "\0\0\0\0\0\0\0\0\0\0", 20));
+      {
+        char *fp = tor_malloc_zero(DIGEST_LEN);
+        if (flavor)
+          strlcpy(fp, flavor, DIGEST_LEN);
+        tor_free(flavor);
+        smartlist_add(dir_fps, fp);
+      }
       request_type = compressed?"v3.z":"v3";
       lifetime = (v && v->fresh_until > now) ? v->fresh_until - now : 0;
     }
@@ -2644,7 +2671,8 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
         flags = DGV_BY_ID |
           (current ? DGV_INCLUDE_PREVIOUS : DGV_INCLUDE_PENDING);
       }
-      dir_split_resource_into_fingerprints(url, fps, NULL, 1, 1);
+      dir_split_resource_into_fingerprints(url, fps, NULL,
+                                           DSR_HEX|DSR_SORT_UNIQ);
       SMARTLIST_FOREACH(fps, char *, fp, {
           if ((d = dirvote_get_vote(fp, flags)))
             smartlist_add(dir_items, (cached_dir_t*)d);
@@ -2697,6 +2725,41 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
     goto done;
   }
 
+  if (!strcmpstart(url, "/tor/micro/d/")) {
+    smartlist_t *fps = smartlist_create();
+
+    dir_split_resource_into_fingerprints(url+strlen("/tor/micro/d/"),
+                                      fps, NULL,
+                                      DSR_DIGEST256|DSR_BASE64|DSR_SORT_UNIQ);
+
+    if (!dirserv_have_any_microdesc(fps)) {
+      write_http_status_line(conn, 404, "Not found");
+      SMARTLIST_FOREACH(fps, char *, fp, tor_free(fp));
+      smartlist_free(fps);
+      goto done;
+    }
+    dlen = dirserv_estimate_microdesc_size(fps, compressed);
+    if (global_write_bucket_low(TO_CONN(conn), dlen, 2)) {
+      log_info(LD_DIRSERV,
+               "Client asked for server descriptors, but we've been "
+               "writing too many bytes lately. Sending 503 Dir busy.");
+      write_http_status_line(conn, 503, "Directory busy, try again later");
+      SMARTLIST_FOREACH(fps, char *, fp, tor_free(fp));
+      smartlist_free(fps);
+      goto done;
+    }
+
+    write_http_response_header(conn, -1, compressed, MICRODESC_CACHE_LIFETIME);
+    conn->dir_spool_src = DIR_SPOOL_MICRODESC;
+    conn->fingerprint_stack = fps;
+
+    if (compressed)
+      conn->zlib_state = tor_zlib_new(1, ZLIB_METHOD);
+
+    connection_dirserv_flushed_some(conn);
+    goto done;
+  }
+
   if (!strcmpstart(url,"/tor/server/") ||
       (!options->BridgeAuthoritativeDir &&
        !options->BridgeRelay && !strcmpstart(url,"/tor/extra/"))) {
@@ -2778,7 +2841,8 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
     } else if (!strcmpstart(url, "/tor/keys/fp/")) {
       smartlist_t *fps = smartlist_create();
       dir_split_resource_into_fingerprints(url+strlen("/tor/keys/fp/"),
-                                           fps, NULL, 1, 1);
+                                           fps, NULL,
+                                           DSR_HEX|DSR_SORT_UNIQ);
       SMARTLIST_FOREACH(fps, char *, d, {
           authority_cert_t *c = authority_cert_get_newest_by_id(d);
           if (c) smartlist_add(certs, c);
@@ -2788,7 +2852,8 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
     } else if (!strcmpstart(url, "/tor/keys/sk/")) {
       smartlist_t *fps = smartlist_create();
       dir_split_resource_into_fingerprints(url+strlen("/tor/keys/sk/"),
-                                           fps, NULL, 1, 1);
+                                           fps, NULL,
+                                           DSR_HEX|DSR_SORT_UNIQ);
       SMARTLIST_FOREACH(fps, char *, d, {
           authority_cert_t *c = authority_cert_get_by_sk_digest(d);
           if (c) smartlist_add(certs, c);
@@ -3523,19 +3588,37 @@ dir_split_resource_into_fingerprint_pairs(const char *res,
 /** Given a directory <b>resource</b> request, containing zero
  * or more strings separated by plus signs, followed optionally by ".z", store
  * the strings, in order, into <b>fp_out</b>.  If <b>compressed_out</b> is
- * non-NULL, set it to 1 if the resource ends in ".z", else set it to 0.  If
- * decode_hex is true, then delete all elements that aren't hex digests, and
- * decode the rest.  If sort_uniq is true, then sort the list and remove
- * all duplicates.
+ * non-NULL, set it to 1 if the resource ends in ".z", else set it to 0.
+ *
+ * If (flags & DSR_HEX), then delete all elements that aren't hex digests, and
+ * decode the rest.  If (flags & DSR_BASE64), then use "-" rather than "+" as
+ * a separator, delete all the elements that aren't base64-encoded digests,
+ * and decode the rest.  If (flags & DSR_DIGEST256), these digests should be
+ * 256 bits long; else they should be 160.
+ *
+ * If (flags & DSR_SORT_UNIQ), then sort the list and remove all duplicates.
  */
 int
 dir_split_resource_into_fingerprints(const char *resource,
                                      smartlist_t *fp_out, int *compressed_out,
-                                     int decode_hex, int sort_uniq)
+                                     int flags)
 {
+  const int decode_hex = flags & DSR_HEX;
+  const int decode_base64 = flags & DSR_BASE64;
+  const int digests_are_256 = flags & DSR_DIGEST256;
+  const int sort_uniq = flags & DSR_SORT_UNIQ;
+
+  const int digest_len = digests_are_256 ? DIGEST256_LEN : DIGEST_LEN;
+  const int hex_digest_len = digests_are_256 ?
+    HEX_DIGEST256_LEN : HEX_DIGEST_LEN;
+  const int base64_digest_len = digests_are_256 ?
+    BASE64_DIGEST256_LEN : BASE64_DIGEST_LEN;
   smartlist_t *fp_tmp = smartlist_create();
+
+  tor_assert(!(decode_hex && decode_base64));
   tor_assert(fp_out);
-  smartlist_split_string(fp_tmp, resource, "+", 0, 0);
+
+  smartlist_split_string(fp_tmp, resource, decode_base64?"-":"+", 0, 0);
   if (compressed_out)
     *compressed_out = 0;
   if (smartlist_len(fp_tmp)) {
@@ -3547,22 +3630,25 @@ dir_split_resource_into_fingerprints(const char *resource,
         *compressed_out = 1;
     }
   }
-  if (decode_hex) {
+  if (decode_hex || decode_base64) {
+    const size_t encoded_len = decode_hex ? hex_digest_len : base64_digest_len;
     int i;
     char *cp, *d = NULL;
     for (i = 0; i < smartlist_len(fp_tmp); ++i) {
       cp = smartlist_get(fp_tmp, i);
-      if (strlen(cp) != HEX_DIGEST_LEN) {
+      if (strlen(cp) != encoded_len) {
         log_info(LD_DIR,
                  "Skipping digest %s with non-standard length.", escaped(cp));
         smartlist_del_keeporder(fp_tmp, i--);
         goto again;
       }
-      d = tor_malloc_zero(DIGEST_LEN);
-      if (base16_decode(d, DIGEST_LEN, cp, HEX_DIGEST_LEN)<0) {
-        log_info(LD_DIR, "Skipping non-decodable digest %s", escaped(cp));
-        smartlist_del_keeporder(fp_tmp, i--);
-        goto again;
+      d = tor_malloc_zero(digest_len);
+      if (decode_hex ?
+          (base16_decode(d, digest_len, cp, hex_digest_len)<0) :
+          (base64_decode(d, digest_len, cp, base64_digest_len)<0)) {
+          log_info(LD_DIR, "Skipping non-decodable digest %s", escaped(cp));
+          smartlist_del_keeporder(fp_tmp, i--);
+          goto again;
       }
       smartlist_set(fp_tmp, i, d);
       d = NULL;
@@ -3572,26 +3658,18 @@ dir_split_resource_into_fingerprints(const char *resource,
     }
   }
   if (sort_uniq) {
-    smartlist_t *fp_tmp2 = smartlist_create();
-    int i;
-    if (decode_hex)
-      smartlist_sort_digests(fp_tmp);
-    else
+    if (decode_hex || decode_base64) {
+      if (digests_are_256) {
+        smartlist_sort_digests256(fp_tmp);
+        smartlist_uniq_digests256(fp_tmp);
+      } else {
+        smartlist_sort_digests(fp_tmp);
+        smartlist_uniq_digests(fp_tmp);
+      }
+    } else {
       smartlist_sort_strings(fp_tmp);
-    if (smartlist_len(fp_tmp))
-      smartlist_add(fp_tmp2, smartlist_get(fp_tmp, 0));
-    for (i = 1; i < smartlist_len(fp_tmp); ++i) {
-      char *cp = smartlist_get(fp_tmp, i);
-      char *last = smartlist_get(fp_tmp2, smartlist_len(fp_tmp2)-1);
-
-      if ((decode_hex && memcmp(cp, last, DIGEST_LEN))
-          || (!decode_hex && strcasecmp(cp, last)))
-        smartlist_add(fp_tmp2, cp);
-      else
-        tor_free(cp);
+      smartlist_uniq_strings(fp_tmp);
     }
-    smartlist_free(fp_tmp);
-    fp_tmp = fp_tmp2;
   }
   smartlist_add_all(fp_out, fp_tmp);
   smartlist_free(fp_tmp);
diff --git a/src/or/dirserv.c b/src/or/dirserv.c
index 251c35d..6ce6a46 100644
--- a/src/or/dirserv.c
+++ b/src/or/dirserv.c
@@ -2870,7 +2870,8 @@ dirserv_get_networkstatus_v2_fingerprints(smartlist_t *result,
       log_info(LD_DIRSERV,
                "Client requested 'all' network status objects; we have none.");
   } else if (!strcmpstart(key, "fp/")) {
-    dir_split_resource_into_fingerprints(key+3, result, NULL, 1, 1);
+    dir_split_resource_into_fingerprints(key+3, result, NULL,
+                                         DSR_HEX|DSR_SORT_UNIQ);
   }
 }
 
@@ -2935,10 +2936,12 @@ dirserv_get_routerdesc_fingerprints(smartlist_t *fps_out, const char *key,
   } else if (!strcmpstart(key, "d/")) {
     by_id = 0;
     key += strlen("d/");
-    dir_split_resource_into_fingerprints(key, fps_out, NULL, 1, 1);
+    dir_split_resource_into_fingerprints(key, fps_out, NULL,
+                                         DSR_HEX|DSR_SORT_UNIQ);
   } else if (!strcmpstart(key, "fp/")) {
     key += strlen("fp/");
-    dir_split_resource_into_fingerprints(key, fps_out, NULL, 1, 1);
+    dir_split_resource_into_fingerprints(key, fps_out, NULL,
+                                         DSR_HEX|DSR_SORT_UNIQ);
   } else {
     *msg = "Key not recognized";
     return -1;
@@ -3003,7 +3006,8 @@ dirserv_get_routerdescs(smartlist_t *descs_out, const char *key,
   } else if (!strcmpstart(key, "/tor/server/d/")) {
     smartlist_t *digests = smartlist_create();
     key += strlen("/tor/server/d/");
-    dir_split_resource_into_fingerprints(key, digests, NULL, 1, 1);
+    dir_split_resource_into_fingerprints(key, digests, NULL,
+                                         DSR_HEX|DSR_SORT_UNIQ);
     SMARTLIST_FOREACH(digests, const char *, d,
        {
          signed_descriptor_t *sd = router_get_by_descriptor_digest(d);
@@ -3016,7 +3020,8 @@ dirserv_get_routerdescs(smartlist_t *descs_out, const char *key,
     smartlist_t *digests = smartlist_create();
     time_t cutoff = time(NULL) - ROUTER_MAX_AGE_TO_PUBLISH;
     key += strlen("/tor/server/fp/");
-    dir_split_resource_into_fingerprints(key, digests, NULL, 1, 1);
+    dir_split_resource_into_fingerprints(key, digests, NULL,
+                                         DSR_HEX|DSR_SORT_UNIQ);
     SMARTLIST_FOREACH(digests, const char *, d,
        {
          if (router_digest_is_me(d)) {
@@ -3132,17 +3137,20 @@ dirserv_test_reachability(time_t now, int try_all)
     ctr = (ctr + 1) % 128;
 }
 
-/** Given a fingerprint <b>fp</b> which is either set if we're looking
- * for a v2 status, or zeroes if we're looking for a v3 status, return
- * a pointer to the appropriate cached dir object, or NULL if there isn't
- * one available. */
+/** Given a fingerprint <b>fp</b> which is either set if we're looking for a
+ * v2 status, or zeroes if we're looking for a v3 status, or a NUL-padded
+ * flavor name if we want a flavored v3 status, return a pointer to the
+ * appropriate cached dir object, or NULL if there isn't one available. */
 static cached_dir_t *
 lookup_cached_dir_by_fp(const char *fp)
 {
   cached_dir_t *d = NULL;
   if (tor_digest_is_zero(fp) && cached_consensuses)
     d = strmap_get(cached_consensuses, "ns");
-  else if (router_digest_is_me(fp) && the_v2_networkstatus)
+  else if (memchr(fp, '\0', DIGEST_LEN) && cached_consensuses &&
+           (d = strmap_get(cached_consensuses, fp))) {
+    /* this here interface is a nasty hack XXXX022 */;
+  } else if (router_digest_is_me(fp) && the_v2_networkstatus)
     d = the_v2_networkstatus;
   else if (cached_v2_networkstatus)
     d = digestmap_get(cached_v2_networkstatus, fp);
@@ -3228,6 +3236,18 @@ dirserv_have_any_serverdesc(smartlist_t *fps, int spool_src)
   return 0;
 }
 
+/** Return true iff any of the 256-bit elements in <b>fps</b> is the digest of
+ * a microdescriptor we have. */
+int
+dirserv_have_any_microdesc(const smartlist_t *fps)
+{
+  microdesc_cache_t *cache = get_microdesc_cache();
+  SMARTLIST_FOREACH(fps, const char *, fp,
+                    if (microdesc_cache_lookup_by_digest256(cache, fp))
+                      return 1);
+  return 0;
+}
+
 /** Return an approximate estimate of the number of bytes that will
  * be needed to transmit the server descriptors (if is_serverdescs --
  * they can be either d/ or fp/ queries) or networkstatus objects (if
@@ -3259,6 +3279,17 @@ dirserv_estimate_data_size(smartlist_t *fps, int is_serverdescs,
   return result;
 }
 
+/** Given a list of microdescriptor hashes, guess how many bytes will be
+ * needed to transmit them, and return the guess. */
+size_t
+dirserv_estimate_microdesc_size(const smartlist_t *fps, int compressed)
+{
+  size_t result = smartlist_len(fps) * microdesc_average_size(NULL);
+  if (compressed)
+    result /= 2;
+  return result;
+}
+
 /** When we're spooling data onto our outbuf, add more whenever we dip
  * below this threshold. */
 #define DIRSERV_BUFFER_MIN 16384
@@ -3322,6 +3353,8 @@ connection_dirserv_add_servers_to_outbuf(dir_connection_t *conn)
 #endif
     body = signed_descriptor_get_body(sd);
     if (conn->zlib_state) {
+      /* XXXX022 This 'last' business should actually happen on the last
+       * routerinfo, not on the last fingerprint. */
       int last = ! smartlist_len(conn->fingerprint_stack);
       connection_write_to_buf_zlib(body, sd->signed_descriptor_len, conn,
                                    last);
@@ -3345,6 +3378,44 @@ connection_dirserv_add_servers_to_outbuf(dir_connection_t *conn)
   return 0;
 }
 
+/** Spooling helper: called when we're sending a bunch of microdescriptors,
+ * and the outbuf has become too empty. Pulls some entries from
+ * fingerprint_stack, and writes the corresponding microdescs onto outbuf.  If
+ * we run out of entries, flushes the zlib state and sets the spool source to
+ * NONE.  Returns 0 on success, negative on failure.
+ */
+static int
+connection_dirserv_add_microdescs_to_outbuf(dir_connection_t *conn)
+{
+  microdesc_cache_t *cache = get_microdesc_cache();
+  while (smartlist_len(conn->fingerprint_stack) &&
+         buf_datalen(conn->_base.outbuf) < DIRSERV_BUFFER_MIN) {
+    char *fp256 = smartlist_pop_last(conn->fingerprint_stack);
+    microdesc_t *md = microdesc_cache_lookup_by_digest256(cache, fp256);
+    tor_free(fp256);
+    if (!md)
+      continue;
+    if (conn->zlib_state) {
+      /* XXXX022 This 'last' business should actually happen on the last
+       * routerinfo, not on the last fingerprint. */
+      int last = !smartlist_len(conn->fingerprint_stack);
+      connection_write_to_buf_zlib(md->body, md->bodylen, conn, last);
+      if (last) {
+        tor_zlib_free(conn->zlib_state);
+        conn->zlib_state = NULL;
+      }
+    } else {
+      connection_write_to_buf(md->body, md->bodylen, TO_CONN(conn));
+    }
+  }
+  if (!smartlist_len(conn->fingerprint_stack)) {
+    conn->dir_spool_src = DIR_SPOOL_NONE;
+    smartlist_free(conn->fingerprint_stack);
+    conn->fingerprint_stack = NULL;
+  }
+  return 0;
+}
+
 /** Spooling helper: Called when we're sending a directory or networkstatus,
  * and the outbuf has become too empty.  Pulls some bytes from
  * <b>conn</b>-\>cached_dir-\>dir_z, uncompresses them if appropriate, and
@@ -3452,6 +3523,8 @@ connection_dirserv_flushed_some(dir_connection_t *conn)
     case DIR_SPOOL_SERVER_BY_DIGEST:
     case DIR_SPOOL_SERVER_BY_FP:
       return connection_dirserv_add_servers_to_outbuf(conn);
+    case DIR_SPOOL_MICRODESC:
+      return connection_dirserv_add_microdescs_to_outbuf(conn);
     case DIR_SPOOL_CACHED_DIR:
       return connection_dirserv_add_dir_bytes_to_outbuf(conn);
     case DIR_SPOOL_NETWORKSTATUS:
diff --git a/src/or/microdesc.c b/src/or/microdesc.c
index 8507065..e10589f 100644
--- a/src/or/microdesc.c
+++ b/src/or/microdesc.c
@@ -20,6 +20,11 @@ struct microdesc_cache_t {
   tor_mmap_t *cache_content;
   /** Number of bytes used in the journal file. */
   size_t journal_len;
+
+  /** Total bytes of microdescriptor bodies we have added to this cache */
+  uint64_t total_len_seen;
+  /** Total number of microdescriptors we have added to this cache */
+  unsigned n_seen;
 };
 
 /** Helper: computes a hash of <b>md</b> to place it in a hash table. */
@@ -176,6 +181,8 @@ microdescs_add_list_to_cache(microdesc_cache_t *cache,
 
     HT_INSERT(microdesc_map, &cache->map, md);
     smartlist_add(added, md);
+    ++cache->n_seen;
+    cache->total_len_seen += md->bodylen;
   } SMARTLIST_FOREACH_END(md);
 
   if (f)
@@ -208,6 +215,8 @@ microdesc_cache_clear(microdesc_cache_t *cache)
     tor_munmap_file(cache->cache_content);
     cache->cache_content = NULL;
   }
+  cache->total_len_seen = 0;
+  cache->n_seen = 0;
 }
 
 /** Reload the contents of <b>cache</b> from disk.  If it is empty, load it
@@ -354,3 +363,29 @@ microdesc_free_all(void)
     tor_free(the_microdesc_cache);
   }
 }
+
+/** If there is a microdescriptor in <b>cache</b> whose sha256 digest is
+ * <b>d</b>, return it.  Otherwise return NULL. */
+microdesc_t *
+microdesc_cache_lookup_by_digest256(microdesc_cache_t *cache, const char *d)
+{
+  microdesc_t *md, search;
+  if (!cache)
+    cache = get_microdesc_cache();
+  memcpy(search.digest, d, DIGEST256_LEN);
+  md = HT_FIND(microdesc_map, &cache->map, &search);
+  return md;
+}
+
+/** Return the mean size of decriptors added to <b>cache</b> since it was last
+ * cleared.  Used to estimate the size of large downloads. */
+size_t
+microdesc_average_size(microdesc_cache_t *cache)
+{
+  if (!cache)
+    cache = get_microdesc_cache();
+  if (!cache->n_seen)
+    return 512;
+  return (size_t)(cache->total_len_seen / cache->n_seen);
+}
+
diff --git a/src/or/or.h b/src/or/or.h
index 21e90a4..0ec8029 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -1171,7 +1171,8 @@ typedef struct dir_connection_t {
   enum {
     DIR_SPOOL_NONE=0, DIR_SPOOL_SERVER_BY_DIGEST, DIR_SPOOL_SERVER_BY_FP,
     DIR_SPOOL_EXTRA_BY_DIGEST, DIR_SPOOL_EXTRA_BY_FP,
-    DIR_SPOOL_CACHED_DIR, DIR_SPOOL_NETWORKSTATUS
+    DIR_SPOOL_CACHED_DIR, DIR_SPOOL_NETWORKSTATUS,
+    DIR_SPOOL_MICRODESC, /* NOTE: if we add another entry, add another bit. */
   } dir_spool_src : 3;
   /** If we're fetching descriptors, what router purpose shall we assign
    * to them? */
@@ -3678,9 +3679,13 @@ void directory_initiate_command(const char *address, const tor_addr_t *addr,
                                 const char *payload, size_t payload_len,
                                 time_t if_modified_since);
 
+#define DSR_HEX       (1<<0)
+#define DSR_BASE64    (1<<1)
+#define DSR_DIGEST256 (1<<2)
+#define DSR_SORT_UNIQ (1<<3)
 int dir_split_resource_into_fingerprints(const char *resource,
-                                    smartlist_t *fp_out, int *compresseed_out,
-                                    int decode_hex, int sort_uniq);
+                                     smartlist_t *fp_out, int *compressed_out,
+                                     int flags);
 /** A pair of digests created by dir_split_resource_info_fingerprint_pairs() */
 typedef struct {
   char first[DIGEST_LEN];
@@ -3816,8 +3821,11 @@ int authdir_wants_to_reject_router(routerinfo_t *ri, const char **msg,
 int dirserv_would_reject_router(routerstatus_t *rs);
 int dirserv_remove_old_statuses(smartlist_t *fps, time_t cutoff);
 int dirserv_have_any_serverdesc(smartlist_t *fps, int spool_src);
+int dirserv_have_any_microdesc(const smartlist_t *fps);
 size_t dirserv_estimate_data_size(smartlist_t *fps, int is_serverdescs,
                                   int compressed);
+size_t dirserv_estimate_microdesc_size(const smartlist_t *fps, int compressed);
+
 typedef enum {
   NS_V2, NS_V3_CONSENSUS, NS_V3_VOTE, NS_CONTROL_PORT,
   NS_V3_CONSENSUS_MICRODESC
@@ -4146,6 +4154,11 @@ int microdesc_cache_rebuild(microdesc_cache_t *cache);
 int microdesc_cache_reload(microdesc_cache_t *cache);
 void microdesc_cache_clear(microdesc_cache_t *cache);
 
+microdesc_t *microdesc_cache_lookup_by_digest256(microdesc_cache_t *cache,
+                                                 const char *d);
+
+size_t microdesc_average_size(microdesc_cache_t *cache);
+
 void microdesc_free(microdesc_t *md);
 void microdesc_free_all(void);
 
diff --git a/src/or/routerlist.c b/src/or/routerlist.c
index 5ae40dd..d5e3b92 100644
--- a/src/or/routerlist.c
+++ b/src/or/routerlist.c
@@ -3832,7 +3832,7 @@ list_pending_downloads(digestmap_t *result,
       const char *resource = TO_DIR_CONN(conn)->requested_resource;
       if (!strcmpstart(resource, prefix))
         dir_split_resource_into_fingerprints(resource + p_len,
-                                             tmp, NULL, 1, 0);
+                                             tmp, NULL, DSR_HEX);
     }
   });
   SMARTLIST_FOREACH(tmp, char *, d,
diff --git a/src/test/test_dir.c b/src/test/test_dir.c
index 68dbbb4..ca55c11 100644
--- a/src/test/test_dir.c
+++ b/src/test/test_dir.c
@@ -361,9 +361,9 @@ test_dir_versions(void)
   ;
 }
 
-/** Run unit tests for misc directory functions. */
+/** Run unit tests for directory fp_pair functions. */
 static void
-test_dir_util(void)
+test_dir_fp_pairs(void)
 {
   smartlist_t *sl = smartlist_create();
   fp_pair_t *pair;
@@ -391,6 +391,127 @@ test_dir_util(void)
 }
 
 static void
+test_dir_split_fps(void *testdata)
+{
+  smartlist_t *sl = smartlist_create();
+  char *mem_op_hex_tmp = NULL;
+  (void)testdata;
+
+  /* Some example hex fingerprints and their base64 equivalents */
+#define HEX1 "Fe0daff89127389bc67558691231234551193EEE"
+#define HEX2 "Deadbeef99999991111119999911111111f00ba4"
+#define HEX3 "b33ff00db33ff00db33ff00db33ff00db33ff00d"
+#define HEX256_1 \
+    "f3f3f3f3fbbbbf3f3f3f3fbbbf3f3f3f3fbbbbf3f3f3f3fbbbf3f3f3f3fbbbbf"
+#define HEX256_2 \
+    "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccCCc"
+#define HEX256_3 \
+    "0123456789ABCdef0123456789ABCdef0123456789ABCdef0123456789ABCdef"
+#define B64_1 "/g2v+JEnOJvGdVhpEjEjRVEZPu4"
+#define B64_2 "3q2+75mZmZERERmZmRERERHwC6Q"
+#define B64_3 "sz/wDbM/8A2zP/ANsz/wDbM/8A0"
+#define B64_256_1 "8/Pz8/u7vz8/Pz+7vz8/Pz+7u/Pz8/P7u/Pz8/P7u78"
+#define B64_256_2 "zMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMw"
+#define B64_256_3 "ASNFZ4mrze8BI0VniavN7wEjRWeJq83vASNFZ4mrze8"
+
+  /* no flags set */
+  dir_split_resource_into_fingerprints("A+C+B", sl, NULL, 0);
+  tt_int_op(smartlist_len(sl), ==, 3);
+  tt_str_op(smartlist_get(sl, 0), ==, "A");
+  tt_str_op(smartlist_get(sl, 1), ==, "C");
+  tt_str_op(smartlist_get(sl, 2), ==, "B");
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  /* uniq strings. */
+  dir_split_resource_into_fingerprints("A+C+B+A+B+B", sl, NULL, DSR_SORT_UNIQ);
+  tt_int_op(smartlist_len(sl), ==, 3);
+  tt_str_op(smartlist_get(sl, 0), ==, "A");
+  tt_str_op(smartlist_get(sl, 1), ==, "B");
+  tt_str_op(smartlist_get(sl, 2), ==, "C");
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  /* Decode hex. */
+  dir_split_resource_into_fingerprints(HEX1"+"HEX2, sl, NULL, DSR_HEX);
+  tt_int_op(smartlist_len(sl), ==, 2);
+  test_mem_op_hex(smartlist_get(sl, 0), ==, HEX1);
+  test_mem_op_hex(smartlist_get(sl, 1), ==, HEX2);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  /* decode hex and drop weirdness. */
+  dir_split_resource_into_fingerprints(HEX1"+bogus+"HEX2"+"HEX256_1,
+                                       sl, NULL, DSR_HEX);
+  tt_int_op(smartlist_len(sl), ==, 2);
+  test_mem_op_hex(smartlist_get(sl, 0), ==, HEX1);
+  test_mem_op_hex(smartlist_get(sl, 1), ==, HEX2);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  /* Decode long hex */
+  dir_split_resource_into_fingerprints(HEX256_1"+"HEX256_2"+"HEX2"+"HEX256_3,
+                                       sl, NULL, DSR_HEX|DSR_DIGEST256);
+  tt_int_op(smartlist_len(sl), ==, 3);
+  test_mem_op_hex(smartlist_get(sl, 0), ==, HEX256_1);
+  test_mem_op_hex(smartlist_get(sl, 1), ==, HEX256_2);
+  test_mem_op_hex(smartlist_get(sl, 2), ==, HEX256_3);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  /* Decode hex and sort. */
+  dir_split_resource_into_fingerprints(HEX1"+"HEX2"+"HEX3"+"HEX2,
+                                       sl, NULL, DSR_HEX|DSR_SORT_UNIQ);
+  tt_int_op(smartlist_len(sl), ==, 3);
+  test_mem_op_hex(smartlist_get(sl, 0), ==, HEX3);
+  test_mem_op_hex(smartlist_get(sl, 1), ==, HEX2);
+  test_mem_op_hex(smartlist_get(sl, 2), ==, HEX1);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  /* Decode long hex and sort */
+  dir_split_resource_into_fingerprints(HEX256_1"+"HEX256_2"+"HEX256_3
+                                       "+"HEX256_1,
+                                       sl, NULL,
+                                       DSR_HEX|DSR_DIGEST256|DSR_SORT_UNIQ);
+  tt_int_op(smartlist_len(sl), ==, 3);
+  test_mem_op_hex(smartlist_get(sl, 0), ==, HEX256_3);
+  test_mem_op_hex(smartlist_get(sl, 1), ==, HEX256_2);
+  test_mem_op_hex(smartlist_get(sl, 2), ==, HEX256_1);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  /* Decode base64 */
+  dir_split_resource_into_fingerprints(B64_1"-"B64_2, sl, NULL, DSR_BASE64);
+  tt_int_op(smartlist_len(sl), ==, 2);
+  test_mem_op_hex(smartlist_get(sl, 0), ==, HEX1);
+  test_mem_op_hex(smartlist_get(sl, 1), ==, HEX2);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  /* Decode long base64 */
+  dir_split_resource_into_fingerprints(B64_256_1"-"B64_256_2,
+                                       sl, NULL, DSR_BASE64|DSR_DIGEST256);
+  tt_int_op(smartlist_len(sl), ==, 2);
+  test_mem_op_hex(smartlist_get(sl, 0), ==, HEX256_1);
+  test_mem_op_hex(smartlist_get(sl, 1), ==, HEX256_2);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+  dir_split_resource_into_fingerprints(B64_256_1,
+                                       sl, NULL, DSR_BASE64|DSR_DIGEST256);
+  tt_int_op(smartlist_len(sl), ==, 1);
+  test_mem_op_hex(smartlist_get(sl, 0), ==, HEX256_1);
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_clear(sl);
+
+ done:
+  SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+  smartlist_free(sl);
+  tor_free(mem_op_hex_tmp);
+}
+
+static void
 test_dir_measured_bw(void)
 {
   measured_bw_line_t mbwl;
@@ -1173,11 +1294,15 @@ test_dir_v3_networkstatus(void)
 #define DIR_LEGACY(name)                                                   \
   { #name, legacy_test_helper, 0, &legacy_setup, test_dir_ ## name }
 
+#define DIR(name)                               \
+  { #name, test_dir_##name, 0, NULL, NULL }
+
 struct testcase_t dir_tests[] = {
   DIR_LEGACY(nicknames),
   DIR_LEGACY(formats),
   DIR_LEGACY(versions),
-  DIR_LEGACY(util),
+  DIR_LEGACY(fp_pairs),
+  DIR(split_fps),
   DIR_LEGACY(measured_bw),
   DIR_LEGACY(param_voting),
   DIR_LEGACY(v3_networkstatus),
-- 
1.5.6.5