[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r16966: {tor} Add country-code support to configured node lists to impleme (in tor/trunk: . contrib doc src/or)



Author: nickm
Date: 2008-09-25 16:21:35 -0400 (Thu, 25 Sep 2008)
New Revision: 16966

Modified:
   tor/trunk/ChangeLog
   tor/trunk/contrib/checkSpace.pl
   tor/trunk/doc/tor.1.in
   tor/trunk/src/or/circuitbuild.c
   tor/trunk/src/or/config.c
   tor/trunk/src/or/geoip.c
   tor/trunk/src/or/or.h
   tor/trunk/src/or/router.c
   tor/trunk/src/or/routerlist.c
   tor/trunk/src/or/routerparse.c
Log:
Add country-code support to configured node lists to implement the ever-popular "no exits in Monaco" feature (ExcludeExitNodes {MC}).  Also allow country codes and IP ranges in ExitNodes.  (EntryNodes needs more work.) Based on code by Robert Hogan.  Needs more testing.

Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/ChangeLog	2008-09-25 20:21:35 UTC (rev 16966)
@@ -11,6 +11,11 @@
       (i.e. new default value for HidServDirectoryV2 is 1). This is the
       last step in proposal 114, which aims to make hidden service
       connections more reliable.
+    - Allow node restrictions to work include country codes.  The syntax
+      to exclude nodes an a country with country code XX is "ExcludeNodes
+      {XX}".  Patch from Robert Hogan.
+    - Allow ExitNodes list to include IP ranges and country codes, just like
+      the Exclude*Nodes lists.  Patch from Robert Hogan.
 
   o Major bugfixes:
     - Fix a bug when parsing ports in tor_addr_port_parse() that caused

Modified: tor/trunk/contrib/checkSpace.pl
===================================================================
--- tor/trunk/contrib/checkSpace.pl	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/contrib/checkSpace.pl	2008-09-25 20:21:35 UTC (rev 16966)
@@ -72,8 +72,8 @@
                 #    print "       //:$fn:$.\n";
                 s!//.*!!;
             }
-            ## Warn about braces preceded by non-space.
-            if (/([^\s])\{/) {
+            ## Warn about unquoted braces preceded by non-space.
+            if (/([^\s'])\{/) {
                 print "       $1\{:$fn:$.\n";
             }
             ## Warn about multiple internal spaces.

Modified: tor/trunk/doc/tor.1.in
===================================================================
--- tor/trunk/doc/tor.1.in	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/doc/tor.1.in	2008-09-25 20:21:35 UTC (rev 16966)
@@ -422,28 +422,28 @@
 .LP
 .TP
 \fBExcludeNodes \fR\fInode\fR,\fInode\fR,\fI...\fP
-A list of identity fingerprints, nicknames, and address patterns of
-nodes to never use when building a circuit.  (Example: ExcludeNodes
-SlowServer, $ABCDEFFFFFFFFFFFFFFF, 255.254.0.0/8)
+A list of identity fingerprints, nicknames, country codes and address patterns
+of nodes to never use when building a circuit.  (Example: ExcludeNodes
+SlowServer, $ABCDEFFFFFFFFFFFFFFF, {cc}, 255.254.0.0/8)
 .LP
 .TP
 \fBExcludeExitNodes \fR\fInode\fR,\fInode\fR,\fI...\fP
-A list of identity fingerprints, nicknames, and address patterns of
-nodes to never use when picking an exit node.  Note that any node
+A list of identity fingerprints, nicknames, country codes and address patterns
+of nodes to never use when picking an exit node.  Note that any node
 listed in ExcludeNodes is automatically considered to be part of this
 list.
 .LP
 .TP
 \fBEntryNodes \fR\fInode\fR,\fInode\fR,\fI...\fP
-A list of identity fingerprints or nicknames of preferred nodes to use for the
-first hop in the circuit.
+A list of identity fingerprints, nicknames, country codes and address patterns
+of nodes to use for the first hop in the circuit.
 These are treated only as preferences unless StrictEntryNodes (see
 below) is also set.
 .LP
 .TP
 \fBExitNodes \fR\fInode\fR,\fInode\fR,\fI...\fP
-A list of identity fingerprints or nicknames of preferred nodes to use for the
-last hop in the circuit.
+A list of identity fingerprints, nicknames, country codes and address patterns
+of nodes to use for the last hop in the circuit.
 These are treated only as preferences unless StrictExitNodes (see
 below) is also set.
 .LP

Modified: tor/trunk/src/or/circuitbuild.c
===================================================================
--- tor/trunk/src/or/circuitbuild.c	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/src/or/circuitbuild.c	2008-09-25 20:21:35 UTC (rev 16966)
@@ -1192,7 +1192,6 @@
   smartlist_t *connections;
   int best_support = -1;
   int n_best_support=0;
-  smartlist_t *sl, *preferredexits;
   routerinfo_t *router;
   or_options_t *options = get_options();
 
@@ -1277,22 +1276,24 @@
            n_best_support, best_support >= 0 ? best_support : 0,
            n_pending_connections);
 
-  preferredexits = smartlist_create();
-  add_nickname_list_to_smartlist(preferredexits,options->ExitNodes,1);
-
-  sl = smartlist_create();
-
   /* If any routers definitely support any pending connections, choose one
    * at random. */
   if (best_support > 0) {
+    smartlist_t *supporting = smartlist_create(), *use = smartlist_create();
+
     for (i = 0; i < smartlist_len(dir->routers); i++)
       if (n_supported[i] == best_support)
-        smartlist_add(sl, smartlist_get(dir->routers, i));
+        smartlist_add(supporting, smartlist_get(dir->routers, i));
 
-    routerset_subtract_routers(sl,options->_ExcludeExitNodesUnion);
-    if (options->StrictExitNodes || smartlist_overlap(sl,preferredexits))
-      smartlist_intersect(sl,preferredexits);
-    router = routerlist_sl_choose_by_bandwidth(sl, WEIGHT_FOR_EXIT);
+    routersets_get_disjunction(use, supporting, options->ExitNodes,
+                               options->_ExcludeExitNodesUnion, 1);
+    if (smartlist_len(use) == 0 && !options->StrictExitNodes) {
+      routersets_get_disjunction(use, supporting, NULL,
+                                 options->_ExcludeExitNodesUnion, 1);
+    }
+    router = routerlist_sl_choose_by_bandwidth(use, WEIGHT_FOR_EXIT);
+    smartlist_free(use);
+    smartlist_free(supporting);
   } else {
     /* Either there are no pending connections, or no routers even seem to
      * possibly support any of them.  Choose a router at random that satisfies
@@ -1300,6 +1301,7 @@
 
     int try;
     smartlist_t *needed_ports;
+    smartlist_t *supporting = smartlist_create(), *use = smartlist_create();
 
     if (best_support == -1) {
       if (need_uptime || need_capacity) {
@@ -1308,8 +1310,6 @@
                  "to list of all routers.",
                  need_capacity?", fast":"",
                  need_uptime?", stable":"");
-        smartlist_free(preferredexits);
-        smartlist_free(sl);
         tor_free(n_supported);
         return choose_good_exit_server_general(dir, 0, 0);
       }
@@ -1326,25 +1326,30 @@
             (try || router_handles_some_port(router, needed_ports))) {
 //          log_fn(LOG_DEBUG,"Try %d: '%s' is a possibility.",
 //                 try, router->nickname);
-          smartlist_add(sl, router);
+          smartlist_add(supporting, router);
         }
       }
 
-      routerset_subtract_routers(sl,options->_ExcludeExitNodesUnion);
-      if (options->StrictExitNodes || smartlist_overlap(sl,preferredexits))
-        smartlist_intersect(sl,preferredexits);
-        /* XXX sometimes the above results in null, when the requested
-         * exit node is down. we should pick it anyway. */
-      router = routerlist_sl_choose_by_bandwidth(sl, WEIGHT_FOR_EXIT);
+      routersets_get_disjunction(use, supporting, options->ExitNodes,
+                                 options->_ExcludeExitNodesUnion, 1);
+      if (smartlist_len(use) == 0 && !options->StrictExitNodes) {
+        routersets_get_disjunction(use, supporting, NULL,
+                                   options->_ExcludeExitNodesUnion, 1);
+      }
+      /* XXX sometimes the above results in null, when the requested
+       * exit node is down. we should pick it anyway. */
+      router = routerlist_sl_choose_by_bandwidth(use, WEIGHT_FOR_EXIT);
       if (router)
         break;
+      smartlist_clear(supporting);
+      smartlist_clear(use);
     }
     SMARTLIST_FOREACH(needed_ports, uint16_t *, cp, tor_free(cp));
     smartlist_free(needed_ports);
+    smartlist_free(use);
+    smartlist_free(supporting);
   }
 
-  smartlist_free(preferredexits);
-  smartlist_free(sl);
   tor_free(n_supported);
   if (router) {
     log_info(LD_CIRC, "Chose exit server '%s'", router->nickname);
@@ -1399,6 +1404,24 @@
   return NULL;
 }
 
+/** Log a warning if the user specified an exit for the circuit that
+ * has been excluded from use by ExcludeNodes or ExcludeExitNodes. */
+static void
+warn_if_router_excluded(const extend_info_t *exit)
+{
+  or_options_t *options = get_options();
+  routerinfo_t *ri = router_get_by_digest(exit->identity_digest);
+
+  if (!ri || !options->_ExcludeExitNodesUnion)
+    return;
+
+  if (routerset_contains_router(options->_ExcludeExitNodesUnion, ri))
+    log_warn(LD_CIRC,"Requested exit node '%s' is in ExcludeNodes, "
+             "or ExcludeExitNodes, using anyway.",exit->nickname);
+
+  return;
+}
+
 /** Decide a suitable length for circ's cpath, and pick an exit
  * router (or use <b>exit</b> if provided). Store these in the
  * cpath. Return 0 if ok, -1 if circuit should be closed. */
@@ -1419,6 +1442,7 @@
   }
 
   if (exit) { /* the circuit-builder pre-requested one */
+    warn_if_router_excluded(exit);
     log_info(LD_CIRC,"Using requested exit node '%s'", exit->nickname);
     exit = extend_info_dup(exit);
   } else { /* we have to decide one */
@@ -1832,7 +1856,7 @@
   else if (options->UseBridges && ri->purpose != ROUTER_PURPOSE_BRIDGE)
     *reason = "not a bridge";
   else if (!options->UseBridges && !ri->is_possible_guard &&
-           !router_nickname_is_in_list(ri, options->EntryNodes))
+           !routerset_contains_router(options->EntryNodes,ri))
     *reason = "not recommended as a guard";
   else if (routerset_contains_router(options->ExcludeNodes, ri))
     *reason = "excluded";
@@ -1856,7 +1880,6 @@
     control_event_guard(e->nickname, e->identity, "GOOD");
     changed = 1;
   }
-
   return changed;
 }
 
@@ -2346,8 +2369,9 @@
     return;
   }
 
-  log_info(LD_CIRC,"Adding configured EntryNodes '%s'.",
-           options->EntryNodes);
+  if (options->EntryNodes)
+    log_info(LD_CIRC,"Adding configured EntryNodes '%s'.",
+             routerset_to_string(options->EntryNodes));
 
   entry_routers = smartlist_create();
   entry_fps = smartlist_create();
@@ -2355,7 +2379,11 @@
   old_entry_guards_not_on_list = smartlist_create();
 
   /* Split entry guards into those on the list and those not. */
-  add_nickname_list_to_smartlist(entry_routers, options->EntryNodes, 0);
+  /* XXXX021 Now that we allow countries and IP ranges in EntryNodes, this is
+   *  potentially an enormous list. For now, we disable such values for
+   *  EntryNodes in options_validate(); really, this wants a better solution.
+   */
+  routerset_get_all_routers(entry_routers, options->EntryNodes, 0);
   SMARTLIST_FOREACH(entry_routers, routerinfo_t *, ri,
                     smartlist_add(entry_fps,ri->cache_info.identity_digest));
   SMARTLIST_FOREACH(entry_guards, entry_guard_t *, e, {

Modified: tor/trunk/src/or/config.c
===================================================================
--- tor/trunk/src/or/config.c	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/src/or/config.c	2008-09-25 20:21:35 UTC (rev 16966)
@@ -194,11 +194,11 @@
   V(DNSListenAddress,            LINELIST, NULL),
   V(DownloadExtraInfo,           BOOL,     "0"),
   V(EnforceDistinctSubnets,      BOOL,     "1"),
-  V(EntryNodes,                  STRING,   NULL),
+  V(EntryNodes,                  ROUTERSET,   NULL),
   V(TestingEstimatedDescriptorPropagationTime, INTERVAL, "10 minutes"),
   V(ExcludeNodes,                ROUTERSET, NULL),
   V(ExcludeExitNodes,            ROUTERSET, NULL),
-  V(ExitNodes,                   STRING, NULL),
+  V(ExitNodes,                   ROUTERSET, NULL),
   V(ExitPolicy,                  LINELIST, NULL),
   V(ExitPolicyRejectPrivate,     BOOL,     "1"),
   V(FallbackNetworkstatusFile,   FILENAME,
@@ -817,13 +817,23 @@
   return _version;
 }
 
+/** Release additional memory allocated in options
+ */
+static void
+or_options_free(or_options_t *options)
+{
+  if (options->_ExcludeExitNodesUnion)
+    routerset_free(options->_ExcludeExitNodesUnion);
+  config_free(&options_format, options);
+}
+
 /** Release all memory and resources held by global configuration structures.
  */
 void
 config_free_all(void)
 {
   if (global_options) {
-    config_free(&options_format, global_options);
+    or_options_free(global_options);
     global_options = NULL;
   }
   if (global_state) {
@@ -1322,8 +1332,9 @@
   if (options->GeoIPFile &&
       ((!old_options || !opt_streq(old_options->GeoIPFile, options->GeoIPFile))
        || !geoip_is_loaded())) {
-    /* XXXX021 Don't use this "<default>" junk; make our filename options
+    /** XXXX021 Don't use this "<default>" junk; make our filename options
      * understand prefixes somehow. -NM */
+    /** XXXX021 Reload GeoIPFile on SIGHUP. -NM */
     char *actual_fname = tor_strdup(options->GeoIPFile);
 #ifdef WIN32
     if (!strcmp(actual_fname, "<default>")) {
@@ -1336,11 +1347,22 @@
 #endif
     geoip_load_file(actual_fname, options);
     tor_free(actual_fname);
+
+    /* XXXX Would iterating through all option_var's routersets be better? */
+    if (options->EntryNodes)
+      routerset_refresh_countries(options->EntryNodes);
+    if (options->ExitNodes)
+      routerset_refresh_countries(options->ExitNodes);
+    if (options->ExcludeNodes)
+      routerset_refresh_countries(options->ExcludeNodes);
+    if (options->ExcludeExitNodes)
+      routerset_refresh_countries(options->ExcludeExitNodes);
+    routerlist_refresh_countries();
   }
   /* Check if we need to parse and add the EntryNodes config option. */
   if (options->EntryNodes &&
       (!old_options ||
-       !opt_streq(old_options->EntryNodes, options->EntryNodes)))
+      (!routerset_equal(old_options->EntryNodes,options->EntryNodes))))
     entry_nodes_should_be_added();
 
   /* Since our options changed, we might need to regenerate and upload our
@@ -1701,7 +1723,6 @@
   case CONFIG_TYPE_LINELIST_S:
     config_line_append((config_line_t**)lvalue, c->key, c->value);
     break;
-
   case CONFIG_TYPE_OBSOLETE:
     log_warn(LD_CONFIG, "Skipping obsolete configuration option '%s'", c->key);
     break;
@@ -2964,19 +2985,24 @@
   }
 
   if (options->StrictExitNodes &&
-      (!options->ExitNodes || !strlen(options->ExitNodes)) &&
+      (!options->ExitNodes) &&
       (!old_options ||
        (old_options->StrictExitNodes != options->StrictExitNodes) ||
-       (!opt_streq(old_options->ExitNodes, options->ExitNodes))))
+       (!routerset_equal(old_options->ExitNodes,options->ExitNodes))))
     COMPLAIN("StrictExitNodes set, but no ExitNodes listed.");
 
   if (options->StrictEntryNodes &&
-      (!options->EntryNodes || !strlen(options->EntryNodes)) &&
+      (!options->EntryNodes) &&
       (!old_options ||
        (old_options->StrictEntryNodes != options->StrictEntryNodes) ||
-       (!opt_streq(old_options->EntryNodes, options->EntryNodes))))
+       (!routerset_equal(old_options->EntryNodes,options->EntryNodes))))
     COMPLAIN("StrictEntryNodes set, but no EntryNodes listed.");
 
+  if (options->EntryNodes && !routerset_is_list(options->EntryNodes)) {
+    /** XXXX021 fix this; see entry_guards_prepend_from_config(). */
+    REJECT("IPs or countries are not yet supported in EntryNodes.");
+  }
+
   if (options->AuthoritativeDir) {
     if (!options->ContactInfo)
       REJECT("Authoritative directory servers must set ContactInfo");
@@ -3334,10 +3360,6 @@
   if (options->UseEntryGuards && ! options->NumEntryGuards)
     REJECT("Cannot enable UseEntryGuards with NumEntryGuards set to 0");
 
-  if (check_nickname_list(options->ExitNodes, "ExitNodes", msg))
-    return -1;
-  if (check_nickname_list(options->EntryNodes, "EntryNodes", msg))
-    return -1;
   if (check_nickname_list(options->MyFamily, "MyFamily", msg))
     return -1;
   for (cl = options->NodeFamilies; cl; cl = cl->next) {

Modified: tor/trunk/src/or/geoip.c
===================================================================
--- tor/trunk/src/or/geoip.c	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/src/or/geoip.c	2008-09-25 20:21:35 UTC (rev 16966)
@@ -42,6 +42,23 @@
 /** A list of all known geoip_entry_t, sorted by ip_low. */
 static smartlist_t *geoip_entries = NULL;
 
+/** Return the index of the <b>country</b>'s entry in the GeoIP DB
+ * if it is a valid 2-letter country code, otherwise return zero.
+ */
+country_t
+geoip_get_country(const char *country)
+{
+  void *_idxplus1;
+  intptr_t idx;
+
+  _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
+  if (!_idxplus1)
+    return -1;
+
+  idx = ((uintptr_t)_idxplus1)-1;
+  return (country_t)idx;
+}
+
 /** Add an entry to the GeoIP table, mapping all IPs between <b>low</b> and
  * <b>high</b>, inclusive, to the 2-letter country code <b>country</b>.
  */
@@ -167,9 +184,15 @@
     log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s.", filename);
     return -1;
   }
-  geoip_countries = smartlist_create();
+  if (!geoip_countries) {
+    geoip_countries = smartlist_create();
+    country_idxplus1_by_lc_code = strmap_new();
+  }
+  if (geoip_entries) {
+    SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, e, tor_free(e));
+    smartlist_free(geoip_entries);
+  }
   geoip_entries = smartlist_create();
-  country_idxplus1_by_lc_code = strmap_new();
   log_info(LD_GENERAL, "Parsing GEOIP file.");
   while (!feof(f)) {
     char buf[512];
@@ -210,7 +233,7 @@
 /** Return the two-letter country code associated with the number <b>num</b>,
  * or "??" for an unknown value. */
 const char *
-geoip_get_country_name(int num)
+geoip_get_country_name(country_t num)
 {
   if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
     geoip_country_t *c = smartlist_get(geoip_countries, num);

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/src/or/or.h	2008-09-25 20:21:35 UTC (rev 16966)
@@ -1321,6 +1321,9 @@
   unsigned int send_unencrypted : 1;
 } signed_descriptor_t;
 
+/** A signed integer representing a country code. */
+typedef int16_t country_t;
+
 /** Information about another onion router in the network. */
 typedef struct {
   signed_descriptor_t cache_info;
@@ -1394,7 +1397,8 @@
   time_t last_reachable;
   /** When did we start testing reachability for this OR? */
   time_t testing_since;
-
+  /** According to the geoip db what country is this router in? */
+  country_t country;
 } routerinfo_t;
 
 /** Information needed to keep and cache a signed extra-info document. */
@@ -2070,6 +2074,8 @@
   struct config_line_t *next;
 } config_line_t;
 
+typedef struct routerset_t routerset_t;
+
 /** Configuration options for a Tor process. */
 typedef struct {
   uint32_t _magic;
@@ -2090,17 +2096,22 @@
   char *Address; /**< OR only: configured address for this onion router. */
   char *PidFile; /**< Where to store PID of Tor process. */
 
-  char *ExitNodes; /**< Comma-separated list of nicknames of ORs to consider
-                    * as exits. */
-  char *EntryNodes; /**< Comma-separated list of nicknames of ORs to consider
-                     * as entry points. */
+  routerset_t *ExitNodes; /**< Structure containing nicknames, digests,
+                           * country codes and IP address patterns of ORs to
+                           * consider as exits. */
+  routerset_t *EntryNodes;/**< Structure containing nicknames, digests,
+                           * country codes and IP address patterns of ORs to
+                           * consider as entry points. */
   int StrictExitNodes; /**< Boolean: When none of our ExitNodes are up, do we
                         * stop building circuits? */
   int StrictEntryNodes; /**< Boolean: When none of our EntryNodes are up, do we
                          * stop building circuits? */
-  struct routerset_t *ExcludeNodes; /**< Comma-separated list of nicknames of
-                       * ORs not to use in circuits. */
-  struct routerset_t *ExcludeExitNodes; /**<DODOC */
+  routerset_t *ExcludeNodes;/**< Structure containing nicknames, digests,
+                             * country codes and IP address patterns of ORs
+                             * not to use in circuits. */
+  routerset_t *ExcludeExitNodes;/**< Structure containing nicknames, digests,
+                                 * country codes and IP address patterns of
+                                 * ORs not to consider as exits. */
 
   /** Union of ExcludeNodes and ExcludeExitNodes */
   struct routerset_t *_ExcludeExitNodesUnion;
@@ -3466,8 +3477,9 @@
 int geoip_load_file(const char *filename, or_options_t *options);
 int geoip_get_country_by_ip(uint32_t ipaddr);
 int geoip_get_n_countries(void);
-const char *geoip_get_country_name(int num);
+const char *geoip_get_country_name(country_t num);
 int geoip_is_loaded(void);
+country_t geoip_get_country(const char *countrycode);
 /** Indicates an action that we might be noting geoip statistics on.
  * Note that if we're noticing CONNECT, we're a bridge, and if we're noticing
  * the others, we're not.
@@ -4277,22 +4289,28 @@
 const char *esc_router_info(routerinfo_t *router);
 void routers_sort_by_identity(smartlist_t *routers);
 
-typedef struct routerset_t routerset_t;
-
 routerset_t *routerset_new(void);
 int routerset_parse(routerset_t *target, const char *s,
                     const char *description);
 void routerset_union(routerset_t *target, const routerset_t *source);
+int routerset_is_list(const routerset_t *set);
 int routerset_contains_router(const routerset_t *set, routerinfo_t *ri);
 int routerset_contains_routerstatus(const routerset_t *set,
                                     routerstatus_t *rs);
 int routerset_contains_extendinfo(const routerset_t *set, extend_info_t *ei);
 void routerset_get_all_routers(smartlist_t *out, const routerset_t *routerset,
                                int running_only);
+void routersets_get_disjunction(smartlist_t *target, const smartlist_t *source,
+                                const routerset_t *include,
+                                const routerset_t *exclude, int running_only);
 void routerset_subtract_routers(smartlist_t *out,
                                 const routerset_t *routerset);
 char *routerset_to_string(const routerset_t *routerset);
+void routerset_refresh_countries(routerset_t *target);
+int routerset_equal(const routerset_t *old, const routerset_t *new);
 void routerset_free(routerset_t *routerset);
+void routerinfo_set_country(routerinfo_t *ri);
+void routerlist_refresh_countries(void);
 
 int hid_serv_get_responsible_directories(smartlist_t *responsible_dirs,
                                          const char *id);

Modified: tor/trunk/src/or/router.c
===================================================================
--- tor/trunk/src/or/router.c	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/src/or/router.c	2008-09-25 20:21:35 UTC (rev 16966)
@@ -1403,6 +1403,8 @@
   router_get_router_hash(ri->cache_info.signed_descriptor_body,
                          ri->cache_info.signed_descriptor_digest);
 
+  routerinfo_set_country(ri);
+
   tor_assert(! routerinfo_incompatible_with_extrainfo(ri, ei, NULL, NULL));
 
   if (desc_routerinfo)

Modified: tor/trunk/src/or/routerlist.c
===================================================================
--- tor/trunk/src/or/routerlist.c	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/src/or/routerlist.c	2008-09-25 20:21:35 UTC (rev 16966)
@@ -4707,6 +4707,14 @@
   /** An address policy for routers in the set.  For implementation reasons,
    * a router belongs to the set if it is _rejected_ by this policy. */
   smartlist_t *policies;
+
+  /** DOCDOC */
+  char *description;
+
+  /** DOCDOC */
+  smartlist_t *country_names;
+  int n_countries;
+  bitarray_t *countries;
 };
 
 /** Return a new empty routerset. */
@@ -4718,9 +4726,85 @@
   result->names = strmap_new();
   result->digests = digestmap_new();
   result->policies = smartlist_create();
+  result->country_names = smartlist_create();
   return result;
 }
 
+/** DOCDOC */
+static char *
+routerset_get_countryname(const char *c)
+{
+  char *country;
+
+  if (strlen(c) < 4 || c[0] !='{' || c[3] !='}')
+    return NULL;
+
+  country = tor_strndup(c+1, 2);
+  tor_strlower(country);
+  return country;
+}
+
+#if 0
+/** Add the GeoIP database's integer index (+1) of a valid two-character
+ * country code to the routerset's <b>countries</b> bitarray. Return the
+ * integer index if the country code is valid, -1 otherwise.*/
+static int
+routerset_add_country(const char *c)
+{
+  char country[3];
+  country_t cc;
+
+  /* XXXX: Country codes must be of the form \{[a-z\?]{2}\} but this accepts
+     \{[.]{2}\}. Do we need to be strict? -RH */
+  /* Nope; if the country code is bad, we'll get 0 when we look it up. */
+
+  if (!geoip_is_loaded()) {
+    log(LOG_WARN, LD_CONFIG, "GeoIP database not loaded: Cannot add country"
+                             "entry %s, ignoring.", c);
+    return -1;
+  }
+
+  memcpy(country, c+1, 2);
+  country[2] = '\0';
+  tor_strlower(country);
+
+  if ((cc=geoip_get_country(country))==-1) {
+    log(LOG_WARN, LD_CONFIG, "Country code '%s' is not valid, ignoring.",
+        country);
+  }
+  return cc;
+}
+#endif
+
+/** Update the routerset's <b>countries</b> bitarray_t. Called whenever
+ * the GeoIP database is reloaded.
+ */
+void
+routerset_refresh_countries(routerset_t *target)
+{
+  int cc;
+  if (target->countries) {
+    bitarray_free(target->countries);
+  }
+  if (!geoip_is_loaded()) {
+    target->countries = NULL;
+    target->n_countries = 0;
+    return;
+  }
+  target->n_countries = geoip_get_n_countries();
+  target->countries = bitarray_init_zero(target->n_countries);
+  SMARTLIST_FOREACH_BEGIN(target->country_names, const char *, country) {
+    cc = geoip_get_country(country);
+    if (cc >= 0) {
+      tor_assert(cc < target->n_countries);
+      bitarray_set(target->countries, cc);
+    } else {
+      log(LOG_WARN, LD_CONFIG, "Country code '%s' is not recognized.",
+          country);
+    }
+  } SMARTLIST_FOREACH_END(country);
+}
+
 /** Parse the string <b>s</b> to create a set of routerset entries, and add
  * them to <b>target</b>.  In log messages, refer to the string as
  * <b>description</b>.  Return 0 on success, -1 on failure.
@@ -4733,10 +4817,12 @@
 routerset_parse(routerset_t *target, const char *s, const char *description)
 {
   int r = 0;
+  int added_countries = 0;
+  char *countryname;
   smartlist_t *list = smartlist_create();
   smartlist_split_string(list, s, ",",
                          SPLIT_SKIP_SPACE | SPLIT_IGNORE_BLANK, 0);
-  SMARTLIST_FOREACH(list, char *, nick, {
+  SMARTLIST_FOREACH_BEGIN(list, char *, nick) {
       addr_policy_t *p;
       if (is_legal_hexdigest(nick)) {
         char d[DIGEST_LEN];
@@ -4748,21 +4834,28 @@
       } else if (is_legal_nickname(nick)) {
         log_debug(LD_CONFIG, "Adding nickname %s to %s", nick, description);
         strmap_set_lc(target->names, nick, (void*)1);
+      } else if ((countryname = routerset_get_countryname(nick)) != NULL) {
+        log_debug(LD_CONFIG, "Adding country %s to %s", nick,
+                  description);
+        smartlist_add(target->country_names, countryname);
+        added_countries = 1;
       } else if ((strchr(nick,'.') || strchr(nick, '*')) &&
                  (p = router_parse_addr_policy_item_from_string(
                                      nick, ADDR_POLICY_REJECT))) {
         log_debug(LD_CONFIG, "Adding address %s to %s", nick, description);
         smartlist_add(target->policies, p);
       } else {
-        log_warn(LD_CONFIG, "Nickname '%s' in %s is misformed.", nick,
+        log_warn(LD_CONFIG, "Entry '%s' in %s is misformed.", nick,
                  description);
         r = -1;
         tor_free(nick);
         SMARTLIST_DEL_CURRENT(list, nick);
       }
-    });
+  } SMARTLIST_FOREACH_END(nick);
   smartlist_add_all(target->list, list);
   smartlist_free(list);
+  if (added_countries)
+    routerset_refresh_countries(target);
   return r;
 }
 
@@ -4779,22 +4872,48 @@
   tor_free(s);
 }
 
+/** Return true iff <b>set</b> lists only nicknames and digests, and includes
+ * no IP ranges or countries. */
+int
+routerset_is_list(const routerset_t *set)
+{
+  return smartlist_len(set->country_names) == 0 &&
+    smartlist_len(set->policies) == 0;
+}
+
+/** DOCDOC */
+static int
+routerset_is_empty(const routerset_t *set)
+{
+  return !set || smartlist_len(set->list) == 0;
+}
+
 /** Helper.  Return true iff <b>set</b> contains a router based on the other
- * provided fields. */
+ * provided fields.  Return higher values for more specific subentries.
+ (If country is -1, then we take the country from addr.) */
 static int
 routerset_contains(const routerset_t *set, const tor_addr_t *addr,
                    uint16_t orport,
-                   const char *nickname, const char *id_digest, int is_named)
+                   const char *nickname, const char *id_digest, int is_named,
+                   country_t country)
 {
   if (!set || !set->list) return 0;
   (void) is_named; /* not supported */
   if (nickname && strmap_get_lc(set->names, nickname))
-    return 1;
+    return 4;
   if (id_digest && digestmap_get(set->digests, id_digest))
-    return 1;
+    return 4;
   if (addr && compare_tor_addr_to_addr_policy(addr, orport, set->policies)
       == ADDR_POLICY_REJECTED)
-    return 1;
+    return 3;
+  if (set->countries) {
+    if (country < 0 && addr)
+      country = geoip_get_country_by_ip(tor_addr_to_ipv4h(addr));
+
+    if (country >= 0 && country < set->n_countries &&
+        bitarray_is_set(set->countries, country))
+      return 2;
+  }
   return 0;
 }
 
@@ -4807,7 +4926,8 @@
                             ei->port,
                             ei->nickname,
                             ei->identity_digest,
-                            -1);
+                            -1, /*is_named*/
+                            -1 /*country*/);
 }
 
 /** Return true iff <b>ri</b> is in <b>set</b>. */
@@ -4821,7 +4941,8 @@
                             ri->or_port,
                             ri->nickname,
                             ri->cache_info.identity_digest,
-                            ri->is_named);
+                            ri->is_named,
+                            ri->country);
 }
 
 /** Return true iff <b>rs</b> is in <b>set</b>. */
@@ -4835,7 +4956,8 @@
                             rs->or_port,
                             rs->nickname,
                             rs->identity_digest,
-                            rs->is_named);
+                            rs->is_named,
+                            -1);
 }
 
 /** Add every known routerinfo_t that is a member of <b>routerset</b> to
@@ -4849,24 +4971,56 @@
     return;
   if (!warned_nicknames)
     warned_nicknames = smartlist_create();
-  SMARTLIST_FOREACH(routerset->list, const char *, name, {
-    routerinfo_t *router = router_get_by_nickname(name, 1);
-    if (router) {
-      if (!running_only || router->is_running)
-        smartlist_add(out, router);
-    }
-  });
-  if (smartlist_len(routerset->policies)) {
+  if (routerset_is_list(routerset)) {
+
+    /* No routers are specified by type; all are given by name or digest.
+     * we can do a lookup in O(len(list)). */
+    SMARTLIST_FOREACH(routerset->list, const char *, name, {
+        routerinfo_t *router = router_get_by_nickname(name, 1);
+        if (router) {
+          if (!running_only || router->is_running)
+            smartlist_add(out, router);
+        }
+    });
+  } else {
+    /* We need to iterate over the routerlist to get all the ones of the
+     * right kind. */
     routerlist_t *rl = router_get_routerlist();
-    SMARTLIST_FOREACH(rl->routers, routerinfo_t *, router,
-      if (compare_addr_to_addr_policy(router->addr, router->or_port,
-               routerset->policies) == ADDR_POLICY_REJECT) {
-        if (!running_only || router->is_running)
+    SMARTLIST_FOREACH(rl->routers, routerinfo_t *, router, {
+        if (running_only && !router->is_running)
+          continue;
+        if (routerset_contains_router(routerset, router))
           smartlist_add(out, router);
-      });
+    });
   }
 }
 
+/** Add to <b>target</b> every node from <b>source</b> that is in
+ * <b>include</b> not excluded in a more specific fashion by
+ * <b>exclude</b>. DOCDOC */
+void
+routersets_get_disjunction(smartlist_t *target,
+                           const smartlist_t *source,
+                           const routerset_t *include,
+                           const routerset_t *exclude, int running_only)
+{
+  SMARTLIST_FOREACH(source, routerinfo_t *, router, {
+    int include_result;
+    if (running_only && !router->is_running)
+      continue;
+    if (!routerset_is_empty(include))
+      include_result = routerset_contains_router(include, router);
+    else
+      include_result = 1;
+
+    if (include_result) {
+      int exclude_result = routerset_contains_router(exclude, router);
+      if (include_result >= exclude_result)
+        smartlist_add(target, router);
+    }
+  });
+}
+
 /** Remove every routerinfo_t from <b>lst</b> that is in <b>routerset</b>. */
 void
 routerset_subtract_routers(smartlist_t *lst, const routerset_t *routerset)
@@ -4892,6 +5046,46 @@
   return smartlist_join_strings(set->list, ",", 0, NULL);
 }
 
+/** Helper: return true iff old and new are both NULL, or both non-NULL
+ * equal routersets. */
+int
+routerset_equal(const routerset_t *old, const routerset_t *new)
+{
+  if (smartlist_len(old->list) != smartlist_len(new->list))
+    return 0;
+
+  SMARTLIST_FOREACH(old->list, const char *, cp1, {
+    const char *cp2 = smartlist_get(new->list, cp1_sl_idx);
+    if (strcmp(cp1, cp2))
+      return 0;
+  });
+
+  return 1;
+
+#if 0
+  /* XXXX: This won't work if the names/digests are identical but in a
+     different order. Checking for exact equality would be heavy going,
+     is it worth it? -RH*/
+  /* This code is totally bogus; sizeof doesn't work even remotely like this
+   * code seems to think.  Let's revert to a string-based comparison for
+   * now. -NM*/
+  if (sizeof(old->names) != sizeof(new->names))
+    return 0;
+
+  if (memcmp(old->names,new->names,sizeof(new->names)))
+    return 0;
+  if (sizeof(old->digests) != sizeof(new->digests))
+    return 0;
+  if (memcmp(old->digests,new->digests,sizeof(new->digests)))
+    return 0;
+  if (sizeof(old->countries) != sizeof(new->countries))
+    return 0;
+  if (memcmp(old->countries,new->countries,sizeof(new->countries)))
+    return 0;
+  return 1;
+#endif
+}
+
 /** Free all storage held in <b>routerset</b>. */
 void
 routerset_free(routerset_t *routerset)
@@ -4901,13 +5095,32 @@
   SMARTLIST_FOREACH(routerset->policies, addr_policy_t *, p,
                     addr_policy_free(p));
   smartlist_free(routerset->policies);
+  SMARTLIST_FOREACH(routerset->country_names, char *, cp, tor_free(cp));
+  smartlist_free(routerset->country_names);
 
   strmap_free(routerset->names, NULL);
   digestmap_free(routerset->digests, NULL);
-
+  if (routerset->countries)
+    bitarray_free(routerset->countries);
   tor_free(routerset);
 }
 
+/** DOCDOC */
+void
+routerinfo_set_country(routerinfo_t *ri)
+{
+  ri->country = geoip_get_country_by_ip(ri->addr);
+}
+
+/** DOCDOC */
+void
+routerlist_refresh_countries(void)
+{
+  routerlist_t *rl = router_get_routerlist();
+  SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri,
+                    routerinfo_set_country(ri));
+}
+
 /** Determine the routers that are responsible for <b>id</b> (binary) and
  * add pointers to those routers' routerstatus_t to <b>responsible_dirs</b>.
  * Return -1 if we're returning an empty smartlist, else return 0.

Modified: tor/trunk/src/or/routerparse.c
===================================================================
--- tor/trunk/src/or/routerparse.c	2008-09-25 17:37:00 UTC (rev 16965)
+++ tor/trunk/src/or/routerparse.c	2008-09-25 20:21:35 UTC (rev 16966)
@@ -1184,6 +1184,7 @@
   tor_assert(tok->n_args >= 5);
 
   router = tor_malloc_zero(sizeof(routerinfo_t));
+  router->country = -1;
   router->cache_info.routerlist_index = -1;
   router->cache_info.annotations_len = s-start_of_annotations + prepend_len;
   router->cache_info.signed_descriptor_len = end-s;
@@ -1388,6 +1389,8 @@
                             "router descriptor") < 0)
     goto err;
 
+  routerinfo_set_country(router);
+
   if (!router->or_port) {
     log_warn(LD_DIR,"or_port unreadable or 0. Failing.");
     goto err;