[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r15294: Implement "new" Tor guard+exit bw-weighted selection algorit (torflow/branches/gsoc2008/TorCtl)



Author: mikeperry
Date: 2008-06-15 21:42:10 -0400 (Sun, 15 Jun 2008)
New Revision: 15294

Modified:
   torflow/branches/gsoc2008/TorCtl/PathSupport.py
Log:

Implement "new" Tor guard+exit bw-weighted selection
algorithm. Also add unit tests used for verification. Seems
to be performing sensibly.



Modified: torflow/branches/gsoc2008/TorCtl/PathSupport.py
===================================================================
--- torflow/branches/gsoc2008/TorCtl/PathSupport.py	2008-06-16 01:14:02 UTC (rev 15293)
+++ torflow/branches/gsoc2008/TorCtl/PathSupport.py	2008-06-16 01:42:10 UTC (rev 15294)
@@ -47,6 +47,7 @@
 import copy
 import Queue
 import time
+import TorUtil
 from TorUtil import *
 
 __all__ = ["NodeRestrictionList", "PathRestrictionList",
@@ -142,6 +143,9 @@
 
   def rewind(self):
     "Rewind the generator to the 'beginning'"
+    # FIXME: If we apply the restrictions now, we can save cycles
+    # during selection, and also some memory overhead (at the cost
+    # of a much slower rewind() though..)
     self.routers = copy.copy(self.sorted_r)
 
   def mark_chosen(self, r):
@@ -532,77 +536,118 @@
         break
 
 class BwWeightedGenerator(NodeGenerator):
-  def __init__(self, sorted_r, rstr_list, pathlen, exit=False):
+  """
+  This is a generator designed to match the Tor Path Selection algorithm. It
+  will generate nodes weighted by their bandwidth, but take the appropriate
+  weighting into account against guard nodes and exit nodes when they are 
+  chosen for positions other than guard/exit. For background see: 
+  routerlist.c::smartlist_choose_by_bandwidth(),
+  http://archives.seul.org/or/dev/Jul-2007/msg00021.html,
+  http://archives.seul.org/or/dev/Jul-2007/msg00056.html, and
+  https://tor-svn.freehaven.net/svn/tor/trunk/doc/spec/path-spec.txt
+  The formulas used are from the first or-dev link, but are proven equivalent 
+  to the ones now used in routerlist.c in the second or-dev link
+  """ 
+  def __init__(self, sorted_r, rstr_list, pathlen, exit=False, guard=False):
     """ Pass exit=True to create a generator for exit-nodes """
+    self.max_bandwidth = 10000000
     # Out for an exit-node?
     self.exit = exit
+    # Is this a guard node? 
+    self.guard = guard
     # Different sums of bandwidths
     self.total_bw = 0
     self.total_exit_bw = 0
-    self.exit_bw_to_dest = 0
+    self.total_guard_bw = 0
+    self.total_weighted_bw = 0
     self.pathlen = pathlen
     NodeGenerator.__init__(self, sorted_r, rstr_list)
 
   def rewind(self):
     NodeGenerator.rewind(self)
     # Set the exit_weight
+    # We are choosing a non-exit
+    self.total_exit_bw = 0
+    self.total_guard_bw = 0
+    self.total_bw = 0
+    for r in self.sorted_r:
+      # Should this be outside the restriction checks?
+      # TODO: Check max_bandwidth and cap...
+      if self.rstr_list.r_is_ok(r):
+        self.total_bw += r.bw
+        if "Exit" in r.flags:
+          self.total_exit_bw += r.bw
+        if "Guard" in r.flags:
+          self.total_guard_bw += r.bw
+
+    bw_per_hop = (1.0*self.total_bw)/self.pathlen
+
+    # Print some debugging info about bandwidth ratios
+    if self.total_bw > 0:
+      e_ratio = self.total_exit_bw/float(self.total_bw)
+      g_ratio = self.total_guard_bw/float(self.total_bw)
+    else:
+      g_ratio = 0
+      e_ratio = 0
+    plog("DEBUG",
+       "E = " + str(self.total_exit_bw) +
+       ", G = " + str(self.total_guard_bw) +
+       ", T = " + str(self.total_bw) +
+       ", g_ratio = " + str(g_ratio) + ", e_ratio = " +str(e_ratio) +
+       ", bw_per_hop = " + str(bw_per_hop))
+   
     if self.exit:
-      self.exit_bw_to_dest = 0
-      # We are choosing an exit-node
-      for r in self.sorted_r:
-        if self.rstr_list.r_is_ok(r):
-          self.exit_bw_to_dest += r.bw
-      plog("DEBUG", "Exit-bandwidth to this destination is " +
-         str(self.exit_bw_to_dest))
-      self.weight = 1.0
+      self.exit_weight = 1.0
     else:
-      # We are choosing a non-exit
-      self.total_exit_bw = 0
-      self.total_bw = 0
-      for r in self.sorted_r:
-        # Should this be outside the restriction checks?
-        if self.rstr_list.r_is_ok(r):
-          self.total_bw += r.bw
-          if "Exit" in r.flags:
-            self.total_exit_bw += r.bw
-
-      bw_per_hop = (1.0*self.total_bw)/self.pathlen
-      if self.total_bw > 0:
-        ratio = self.total_exit_bw/float(self.total_bw)
-      else: ratio = 0
-      plog("DEBUG", "E = " + str(self.total_exit_bw) +
-         ", T = " + str(self.total_bw) +
-         ", ratio = " + str(ratio) +
-         ", bw_per_hop = " + str(bw_per_hop))
-      
       if self.total_exit_bw < bw_per_hop:
         # Don't use exit nodes at all
-        # add a ConserveExitsRestriction?
-        self.weight = 0
+        self.exit_weight = 0
       else:
         if self.total_exit_bw > 0:
-          self.weight = ((self.total_exit_bw-bw_per_hop)/self.total_exit_bw)
-        else: self.weight = 0
-    plog("DEBUG", "The exit-weight is: " + str(self.weight))
+          self.exit_weight = ((self.total_exit_bw-bw_per_hop)/self.total_exit_bw)
+        else: self.exit_weight = 0
 
+    if self.guard:
+      self.guard_weight = 1.0
+    else:
+      if self.total_guard_bw < bw_per_hop:
+        # Don't use exit nodes at all
+        self.guard_weight = 0
+      else:
+        if self.total_guard_bw > 0:
+          self.guard_weight = ((self.total_guard_bw-bw_per_hop)/self.total_guard_bw)
+        else: self.guard_weight = 0
+    
+    for r in self.sorted_r:
+      bw = r.bw
+      if "Exit" in r.flags:
+        bw *= self.exit_weight
+      if "Guard" in r.flags:
+        bw *= self.guard_weight
+      self.total_weighted_bw += bw
+
+    self.total_weighted_bw = int(self.total_weighted_bw)
+    plog("DEBUG", "Bw: "+str(self.total_weighted_bw)+"/"+str(self.total_bw)
+          +". The exit-weight is: "+str(self.exit_weight)
+          + ", guard weight is: "+str(self.guard_weight))
+
   def next_r(self):
     while True:
       # Choose a suitable random int
-      if self.exit:
-        i = random.randint(0, self.exit_bw_to_dest)
-      else:
-        i = random.randint(0,
-                     (self.total_bw-self.total_exit_bw)    # nonexit
-                     +int(self.total_exit_bw*self.weight)) # + weighted exit
+      i = random.randint(0, self.total_weighted_bw)
+
       # Go through the routers
       for r in self.routers:
-        # Below zero here --> choose a new random int+router
+        # Below zero here means next() -> choose a new random int+router 
         if i < 0: break
         if self.rstr_list.r_is_ok(r):
-          # Only weight exit nodes
+          bw = r.bw
           if "Exit" in r.flags:
-            i -= self.weight * r.bw
-          else: i -= r.bw
+            bw *= self.exit_weight
+          if "Guard" in r.flags:
+            bw *= self.guard_weight
+
+          i -= bw
           if i < 0:
             plog("DEBUG", "Chosen router with a bandwidth of: " + str(r.bw))
             yield r
@@ -734,7 +779,7 @@
       self.exit_rstr.add_restriction(CountryCodeRestriction())
       
       # Specified countries for different positions
-      if self.geoip_config.entry_country:  
+      if self.geoip_config.entry_country:
         entry_rstr.add_restriction(CountryRestriction(self.geoip_config.entry_country))
       if self.geoip_config.middle_country:
         mid_rstr.add_restriction(CountryRestriction(self.geoip_config.middle_country))
@@ -747,7 +792,7 @@
         if len(self.geoip_config.excludes) > 0:
           entry_rstr.add_restriction(ExcludeCountriesRestriction(self.geoip_config.excludes))
           mid_rstr.add_restriction(ExcludeCountriesRestriction(self.geoip_config.excludes))
-          self.exit_rstr.add_restriction(ExcludeCountriesRestriction(self.geoip_config.excludes))      
+          self.exit_rstr.add_restriction(ExcludeCountriesRestriction(self.geoip_config.excludes))
       
       # Unique countries set? None --> pass
       if self.geoip_config.unique_countries != None:
@@ -776,7 +821,7 @@
           OrderedExitGenerator(80, sorted_r, self.exit_rstr)
     elif self.uniform:
       # 'real' exits should also be chosen when not using 'order_exits'
-      self.exit_rstr.add_restriction(ExitPolicyRestriction("255.255.255.255", 80)) 
+      self.exit_rstr.add_restriction(ExitPolicyRestriction("255.255.255.255", 80))
       exitgen = UniformGenerator(sorted_r, self.exit_rstr)
     else:
       self.exit_rstr.add_restriction(ExitPolicyRestriction("255.255.255.255", 80))
@@ -792,7 +837,8 @@
       entry_rstr.del_restriction(ConserveExitsRestriction)
       mid_rstr.del_restriction(ConserveExitsRestriction)
       self.path_selector = PathSelector(
-         BwWeightedGenerator(sorted_r, entry_rstr, self.pathlen),
+         BwWeightedGenerator(sorted_r, entry_rstr, self.pathlen,
+                             guard=self.use_guards),
          BwWeightedGenerator(sorted_r, mid_rstr, self.pathlen),
          exitgen, self.path_rstr)
       return
@@ -1487,6 +1533,49 @@
 
 ########################## Unit tests ##########################
 
+def do_gen_unit(gen, r_list, weight_bw, num_print):
+  trials = 0
+  for r in r_list:
+    if gen.rstr_list.r_is_ok(r):
+      trials += weight_bw(gen, r)
+  trials = int(trials/1024)
+  
+  print "Running "+str(trials)+" trials"
+
+  # 0. Reset r.chosen = 0 for all routers
+  for r in r_list:
+    r.chosen = 0
+
+  # 1. Generate 'trials' choices:
+  #    1a. r.chosen++
+
+  loglevel = TorUtil.loglevel
+  TorUtil.loglevel = "INFO"
+
+  #gen.rewind() - Just overhead if we create a fresh generator each time
+  rtrs = gen.next_r()
+  for i in xrange(1, trials):
+    r = rtrs.next()
+    r.chosen += 1
+
+  TorUtil.loglevel = loglevel
+
+  # 2. Print top num_print routers choices+bandwidth stats+flags
+  i = 0
+  copy_rlist = copy.copy(r_list)
+  copy_rlist.sort(lambda x, y: cmp(y.chosen, x.chosen))
+  for r in copy_rlist:
+    if not gen.rstr_list.r_is_ok(r): continue
+    flag = ""
+    bw = int(weight_bw(gen, r))
+    if "Exit" in r.flags:
+      flag += "E"
+    if "Guard" in r.flags:
+      flag += "G"
+    print str(r.list_rank)+". "+r.nickname+" "+str(r.bw/1024)+"/"+str(bw/1024)+": "+str(r.chosen)+", "+flag
+    i += 1
+    if i > num_print: break
+
 def do_unit(rst, r_list, plamb):
   print "\n"
   print "-----------------------------------"
@@ -1516,7 +1605,7 @@
 
 if __name__ == '__main__':
   s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-  s.connect(("127.0.0.1",9061))
+  s.connect(("127.0.0.1",9051))
   c = Connection(s)
   c.debug(file("control.log", "w"))
   c.authenticate()
@@ -1525,14 +1614,35 @@
 
   sorted_rlist.sort(lambda x, y: cmp(y.bw, x.bw))
   for i in xrange(len(sorted_rlist)): sorted_rlist[i].list_rank = i
+
+  def flag_weighting(bwgen, r):
+    bw = r.bw
+    if "Exit" in r.flags:
+      bw *= bwgen.exit_weight
+    if "Guard" in r.flags:
+      bw *= bwgen.guard_weight
+    return bw
+
+  do_gen_unit(BwWeightedGenerator(sorted_rlist, FlagsRestriction(["Exit"]),
+                                  3, exit=True),
+              sorted_rlist, flag_weighting, 500)
   
+  do_gen_unit(BwWeightedGenerator(sorted_rlist, FlagsRestriction(["Guard"]),
+              3, guard=True),
+              sorted_rlist, flag_weighting, 500)
+  
+  do_gen_unit(
+   BwWeightedGenerator(sorted_rlist, FlagsRestriction(["Valid"]), 3),
+   sorted_rlist, flag_weighting, 500)
+  
+  exit(0)
+ 
   for r in sorted_rlist:
     if r.will_exit_to("211.11.21.22", 465):
       print r.nickname+" "+str(r.bw)
 
   do_unit(FlagsRestriction(["Guard"], []), sorted_rlist, lambda r: " ".join(r.flags))
   do_unit(FlagsRestriction(["Fast"], []), sorted_rlist, lambda r: " ".join(r.flags))
-  exit(0)
 
   do_unit(ExitPolicyRestriction("2.11.2.2", 80), sorted_rlist,
           lambda r: "exits to 80")