[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r9806: Stats now written out. Also added more verification checks. (in torflow/trunk: . TorCtl)



Author: mikeperry
Date: 2007-03-12 17:07:19 -0400 (Mon, 12 Mar 2007)
New Revision: 9806

Modified:
   torflow/trunk/TorCtl/PathSupport.py
   torflow/trunk/TorCtl/TorCtl.py
   torflow/trunk/metatroller.py
Log:
Stats now written out. Also added more verification checks.



Modified: torflow/trunk/TorCtl/PathSupport.py
===================================================================
--- torflow/trunk/TorCtl/PathSupport.py	2007-03-12 13:04:20 UTC (rev 9805)
+++ torflow/trunk/TorCtl/PathSupport.py	2007-03-12 21:07:19 UTC (rev 9806)
@@ -237,7 +237,7 @@
             self.idhex = idhex.upper()
 
     def r_is_ok(self, router):
-        return router.idhex.upper() == self.idhex
+        return router.idhex == self.idhex
     
 class MinBWRestriction(NodeRestriction):
     def __init__(self, minbw):
@@ -566,8 +566,9 @@
                 # valid and current (especially for stats)
                 self.routers[r.idhex].update_to(r)
             else:
-                self.routers[r.idhex] = self.RouterClass(r)
-                new_routers.append(self.RouterClass(r))
+                rc = self.RouterClass(r)
+                self.routers[r.idhex] = rc
+                new_routers.append(rc)
         self.sorted_r.extend(new_routers)
         self.sorted_r.sort(lambda x, y: cmp(y.bw, x.bw))
 
@@ -596,7 +597,7 @@
                     except TorCtl.ErrorReply, e:
                         # No need to retry here. We should get the failed
                         # event for either the circ or stream next
-                        plog("NOTICE", "Error attaching stream: "+str(e.args))
+                        plog("WARN", "Error attaching stream: "+str(e.args))
                         return
                     break
         else:
@@ -685,8 +686,14 @@
                 self.attach_stream_any(stream, stream.detached_from)
         elif c.status == "BUILT":
             self.circuits[c.circ_id].built = True
-            for stream in self.circuits[c.circ_id].pending_streams:
-                self.c.attach_stream(stream.sid, c.circ_id)
+            try:
+                for stream in self.circuits[c.circ_id].pending_streams:
+                    self.c.attach_stream(stream.sid, c.circ_id)
+            except TorCtl.ErrorReply, e:
+                # No need to retry here. We should get the failed
+                # event for either the circ or stream next
+                plog("WARN", "Error attaching stream: "+str(e.args))
+                return
 
     def stream_status_event(self, s):
         output = [s.event_name, str(s.strm_id), s.status, str(s.circ_id),

Modified: torflow/trunk/TorCtl/TorCtl.py
===================================================================
--- torflow/trunk/TorCtl/TorCtl.py	2007-03-12 13:04:20 UTC (rev 9805)
+++ torflow/trunk/TorCtl/TorCtl.py	2007-03-12 21:07:19 UTC (rev 9806)
@@ -63,7 +63,7 @@
         self.orport = int(orport)
         self.dirport = int(dirport)
         self.flags = flags
-        self.idhex = (self.idhash + "=").decode("base64").encode("hex")
+        self.idhex = (self.idhash + "=").decode("base64").encode("hex").upper()
         m = re.search(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)", updated)
         self.updated = datetime.datetime(*map(int, m.groups()))
 
@@ -213,10 +213,7 @@
                 return ret
         plog("NOTICE", "No matching exit line for "+self.nickname)
         return False
-    
-    def __eq__(self, other): return self.idhex == other.idhex
-    def __ne__(self, other): return self.idhex != other.idhex
-
+   
 class Circuit:
     def __init__(self):
         self.cid = 0
@@ -526,6 +523,9 @@
         if router != ns.nickname:
             plog("NOTICE", "Got different names " + ns.nickname + " vs " +
                          router + " for " + ns.idhex)
+        # XXX: Compile these regular expressions? This is an expensive process
+        # Use http://docs.python.org/lib/profile.html to verify this is 
+        # the part of startup that is slow
         for line in desc:
             pl = re.search(r"^platform Tor (\S+) on (\S+)", line)
             ac = re.search(r"^accept (\S+):([^-]+)(?:-(\d+))?", line)
@@ -534,7 +534,7 @@
             if re.search(r"^opt hibernating 1", line):
                 #dead = 1 # XXX: Technically this may be stale..
                 if ("Running" in ns.flags):
-                    plog("NOTICE", "Hibernating router "+ns.nickname+" is running..")
+                    plog("INFO", "Hibernating router "+ns.nickname+" is running..")
             if ac:
                 exitpolicy.append(ExitPolicyLine(True, *ac.groups()))
             elif rj:
@@ -544,7 +544,7 @@
             elif pl:
                 version, os = pl.groups()
         if not bw_observed and not dead and ("Valid" in ns.flags):
-            plog("NOTICE", "No bandwidth for live router " + ns.nickname)
+            plog("INFO", "No bandwidth for live router " + ns.nickname)
         return Router(ns.idhex, ns.nickname, bw_observed, dead, exitpolicy,
                 ns.flags, ip, version, os)
 

Modified: torflow/trunk/metatroller.py
===================================================================
--- torflow/trunk/metatroller.py	2007-03-12 13:04:20 UTC (rev 9805)
+++ torflow/trunk/metatroller.py	2007-03-12 21:07:19 UTC (rev 9806)
@@ -55,14 +55,14 @@
     def reset(self):
         self.circ_uncounted = 0
         self.circ_failed = 0
-        self.circ_succeeded = 0
+        self.circ_succeeded = 0 # disjoint from failed (for verification only)
         self.circ_suspected = 0
-        self.circ_selections = 0 # above 4 should add to this
+        self.circ_chosen = 0 # above 4 should add to this
         self.strm_failed = 0 # Only exits should have these
         self.strm_succeeded = 0
-        self.strm_suspected = 0
+        self.strm_suspected = 0 # disjoint from failed (for verification only)
         self.strm_uncounted = 0
-        self.strm_selections = 0 # above 3 should add to this
+        self.strm_chosen = 0 # above 3 should add to this
         self.reason_suspected = {}
         self.reason_failed = {}
         self.first_seen = time.time()
@@ -77,13 +77,53 @@
         self.max_bw = 0
         self.min_bw = 0
         self.avg_bw = 0
-    
+
+    def current_uptime(self):
+        if self.became_active_at:
+            ret = (self.total_active_uptime+(time.time()-self.became_active_at))
+        else:
+            ret = self.total_active_uptime
+        if ret == 0: return 0.000005 # eh..
+        else: return ret
+                
+    def failed_per_hour(self):
+        return (3600.*(self.circ_failed+self.strm_failed))/self.current_uptime()
+
+    def suspected_per_hour(self):
+        return (3600.*(self.circ_suspected+self.strm_suspected
+                  +self.circ_failed+self.strm_failed))/self.current_uptime()
+
+    # These four are for sanity checking
+    def _suspected_per_hour(self):
+        return (3600.*(self.circ_suspected+self.strm_suspected))/self.current_uptime()
+
+    def _uncounted_per_hour(self):
+        return (3600.*(self.circ_uncounted+self.strm_uncounted))/self.current_uptime()
+
+    def _chosen_per_hour(self):
+        return (3600.*(self.circ_chosen+self.strm_chosen))/self.current_uptime()
+
+    def _succeeded_per_hour(self):
+        return (3600.*(self.circ_succeeded+self.strm_succeeded))/self.current_uptime()
+        
+    def __str__(self):
+        return (self.idhex+" ("+self.nickname+")\n\t"
+            +" CF=" +str(self.circ_failed)
+            +" CS="+ str(self.circ_suspected+self.circ_failed)
+            +" CC="+ str(self.circ_chosen)
+            +" SF="+ str(self.strm_failed)
+            +" SS="+ str(self.strm_suspected+self.strm_failed)
+            +" SC="+ str(self.strm_chosen)
+            +" FR="+ str(round(self.failed_per_hour(),2))
+            +" SR="+ str(round(self.suspected_per_hour(),2))
+            +" Up="+str(round(self.current_uptime()/3600, 1))+"h\n")
+
     def sanity_check(self):
         if self.circ_failed + self.circ_succeeded + self.circ_suspected \
-            + self.circ_uncounted != self.circ_selections:
+            + self.circ_uncounted != self.circ_chosen:
             plog("ERROR", self.nickname+" does not add up for circs")
         if self.strm_failed + self.strm_succeeded + self.strm_suspected \
-            + self.strm_uncounted != self.strm_selections:
+            + self.strm_uncounted != self.strm_chosen:
             plog("ERROR", self.nickname+" does not add up for streams")
         def check_reasons(reasons, expected, which, rtype):
             count = 0
@@ -102,17 +142,161 @@
         if self.became_active_at: tot_uptime += now - self.became_active_at
         if round(tot_hib_time+tot_uptime) != round(now-self.first_seen):
             plog("ERROR", "Mismatch of uptimes for "+self.nickname)
+        
+        per_hour_tot = round(self._uncounted_per_hour()+self.failed_per_hour()+
+                 self._suspected_per_hour()+self._succeeded_per_hour(), 2)
+        chosen_tot = round(self._chosen_per_hour(), 2)
+        if per_hour_tot != chosen_tot:
+            plog("ERROR", self.nickname+" has mismatch of per hour counts: "+str(per_hour_tot) +" vs "+str(chosen_tot))
 
+class ReasonRouterList:
+    "Helper class to track which reasons are in which routers."
+    def __init__(self, reason):
+        self.reason = reason
+        self.rlist = {}
+
+    def sort_list(self): raise NotImplemented()
+
+    def write_list(self, f):
+        rlist = self.sort_list()
+        for r in rlist:
+            f.write(r.idhex+" ("+r.nickname+") Fail=")
+            if self.reason in r.reason_failed:
+                f.write(str(r.reason_failed[self.reason]))
+            else: f.write("0")
+            f.write(" Susp=")
+            if self.reason in r.reason_suspected:
+                f.write(str(r.reason_suspected[self.reason])+"\n")
+            else: f.write("0\n")
+        
+    def add_r(self, r):
+        self.rlist[r] = 1
+
+    def total_suspected(self):
+        # suspected is disjoint from failed. The failed table
+        # may not have an entry
+        def notlambda(x, y):
+            if self.reason in y.reason_suspected:
+                if self.reason in y.reason_failed:
+                    return (x + y.reason_suspected[self.reason]
+                             + y.reason_failed[self.reason])
+                else:
+                    return (x + y.reason_suspected[self.reason])
+            else:
+                if self.reason in y.reason_failed:
+                    return (x + y.reason_failed[self.reason])
+                else: return x
+        return reduce(notlambda, self.rlist.iterkeys(), 0)
+
+    def total_failed(self):
+        def notlambda(x, y):
+            if self.reason in y.reason_failed:
+                return (x + y.reason_failed[self.reason])
+            else: return x
+        return reduce(notlambda, self.rlist.iterkeys(), 0)
+ 
+class SuspectRouterList(ReasonRouterList):
+    def __init__(self, reason): ReasonRouterList.__init__(self,reason)
+    
+    def sort_list(self):
+        rlist = self.rlist.keys()
+        rlist.sort(lambda x, y: cmp(y.reason_suspected[self.reason],
+                                    x.reason_suspected[self.reason]))
+        return rlist
+   
+    def _verify_suspected(self):
+        return reduce(lambda x, y: x + y.reason_suspected[self.reason],
+                        self.rlist.iterkeys(), 0)
+
+class FailedRouterList(ReasonRouterList):
+    def __init__(self, reason): ReasonRouterList.__init__(self,reason)
+
+    def sort_list(self):
+        rlist = self.rlist.keys()
+        rlist.sort(lambda x, y: cmp(y.reason_failed[self.reason],
+                                    x.reason_failed[self.reason]))
+        return rlist
+
+    def _verify_failed(self):
+        return reduce(lambda x, y: x + y.reason_failed[self.reason],
+                        self.rlist.iterkeys(), 0)
+
 class StatsHandler(PathSupport.PathBuilder):
     def __init__(self, c, slmgr):
         PathBuilder.__init__(self, c, slmgr, StatsRouter)
+        self.failed_reasons = {}
+        self.suspect_reasons = {}
 
+    def write_reasons(self, f, reasons, name):
+        f.write("\n\n\t------------------- "+name+" -------------------\n")
+        for rsn in reasons:
+            f.write("\nReason="+rsn.reason+". Failed: "+str(rsn.total_failed())
+                    +", Suspected: "+str(rsn.total_suspected())+"\n")
+            rsn.write_list(f)
+
+    def write_routers(self, f, rlist, name):
+        f.write("\n\n\t------------------- "+name+" -------------------\n\n")
+        for r in rlist:
+            f.write(str(r))
+
     def write_stats(self, filename):
-        # FIXME: Sort this by different values.
         plog("DEBUG", "Writing stats")
+        # Sanity check routers
+        # TODO: all sanity checks should be turned off once its stable.
+        for r in self.sorted_r: r.sanity_check()
+
+        # Sanity check the router reason lists.
         for r in self.sorted_r:
-            r.sanity_check()
+            for rsn in r.reason_failed:
+                if r not in self.failed_reasons[rsn].rlist:
+                    plog("ERROR", "Router missing from reason table")
+            for rsn in r.reason_suspected:
+                if r not in self.suspect_reasons[rsn].rlist:
+                    plog("ERROR", "Router missing from reason table")
 
+        # Sanity check the lists the other way
+        for rsn in self.failed_reasons.itervalues(): rsn._verify_failed()
+        for rsn in self.suspect_reasons.itervalues(): rsn._verify_suspected()
+
+        f = file(filename, "w")
+
+        # FIXME: Print out key/legend header
+        failed = copy.copy(self.sorted_r)
+        failed.sort(lambda x, y:
+                  cmp(y.circ_failed+y.strm_failed,
+                        x.circ_failed+x.strm_failed))
+        self.write_routers(f, failed, "Failed Counts")
+
+        suspected = copy.copy(self.sorted_r)
+        suspected.sort(lambda x, y: # Suspected includes failed
+             cmp(y.circ_failed+y.strm_failed+y.circ_suspected+y.strm_suspected,
+                 x.circ_failed+x.strm_failed+x.circ_suspected+x.strm_suspected))
+        self.write_routers(f, suspected, "Suspected Counts")
+
+        fail_rate = copy.copy(failed)
+        fail_rate.sort(lambda x, y:
+             cmp(y.failed_per_hour(), x.failed_per_hour()))
+        self.write_routers(f, fail_rate, "Fail Rates")
+
+        suspect_rate = copy.copy(suspected)
+        suspect_rate.sort(lambda x, y:
+             cmp(y.suspected_per_hour(), x.suspected_per_hour()))
+        self.write_routers(f, suspect_rate, "Suspect Rates")
+
+        # TODO: Sort by failed/selected and suspect/selected ratios
+        # if we ever want to do non-uniform scanning..
+
+        susp_reasons = self.suspect_reasons.values()
+        susp_reasons.sort(lambda x, y:
+             cmp(y.total_suspected(), x.total_suspected()))
+        self.write_reasons(f, susp_reasons, "Suspect Reasons")
+
+        fail_reasons = self.failed_reasons.values()
+        fail_reasons.sort(lambda x, y:
+             cmp(y.total_failed(), x.total_failed()))
+        self.write_reasons(f, fail_reasons, "Failed Reasons")
+        f.close()
+
     def reset_stats(self):
         for r in self.sorted_r:
             r.reset()
@@ -120,11 +304,13 @@
     # TODO: Use stream bandwidth events to implement reputation system
     # from
     # http://www.cs.colorado.edu/department/publications/reports/docs/CU-CS-1025-07.pdf
-    # (though my bet's on it not working on a real Tor network)
+    # aha! the way to detect lying nodes as a client is to test 
+    # their bandwidths in tiers.. only make circuits of nodes of 
+    # the same bandwidth.. Then look for nodes with odd avg bandwidths
 
     def circ_status_event(self, c):
         if c.circ_id in self.circuits:
-            # XXX: Hrmm, consider making this sane in TorCtl.
+            # TODO: Hrmm, consider making this sane in TorCtl.
             if c.reason: lreason = c.reason
             else: lreason = "NONE"
             if c.remote_reason: rreason = c.remote_reason
@@ -132,7 +318,7 @@
             reason = c.event_name+":"+c.status+":"+lreason+":"+rreason
             if c.status == "FAILED":
                 # update selection count
-                for r in self.circuits[c.circ_id].path: r.circ_selections += 1
+                for r in self.circuits[c.circ_id].path: r.circ_chosen += 1
                 
                 if len(c.path)-1 < 0: start_f = 0
                 else: start_f = len(c.path)-1 
@@ -143,6 +329,9 @@
                     if not reason in r.reason_failed:
                         r.reason_failed[reason] = 1
                     else: r.reason_failed[reason]+=1
+                    if reason not in self.failed_reasons:
+                         self.failed_reasons[reason] = FailedRouterList(reason)
+                    self.failed_reasons[reason].add_r(r)
 
                 for r in self.circuits[c.circ_id].path[len(c.path)+1:]:
                     r.circ_uncounted += 1
@@ -154,12 +343,15 @@
                     if not reason in r.reason_suspected:
                         r.reason_suspected[reason] = 1
                     else: r.reason_suspected[reason]+=1
+                    if reason not in self.suspect_reasons:
+                         self.suspect_reasons[reason] = SuspectRouterList(reason)
+                    self.suspect_reasons[reason].add_r(r)
             elif c.status == "CLOSED":
                 # Since PathBuilder deletes the circuit on a failed, 
                 # we only get this for a clean close
-                # Update circ_selections count
+                # Update circ_chosen count
                 for r in self.circuits[c.circ_id].path:
-                    r.circ_selections += 1
+                    r.circ_chosen += 1
                 
                     if lreason in ("REQUESTED", "FINISHED", "ORIGIN"):
                         r.circ_succeeded += 1
@@ -168,11 +360,14 @@
                             r.reason_suspected[reason] = 1
                         else: r.reason_suspected[reason] += 1
                         r.circ_suspected+= 1
+                        if reason not in self.suspect_reasons:
+                            self.suspect_reasons[reason] = SuspectRouterList(reason)
+                        self.suspect_reasons[reason].add_r(r)
         PathBuilder.circ_status_event(self, c)
     
     def stream_status_event(self, s):
         if s.strm_id in self.streams:
-            # XXX: Hrmm, consider making this sane in TorCtl.
+            # TODO: Hrmm, consider making this sane in TorCtl.
             if s.reason: lreason = s.reason
             else: lreason = "NONE"
             if s.remote_reason: rreason = s.remote_reason
@@ -180,18 +375,24 @@
             reason = s.event_name+":"+s.status+":"+lreason+":"+rreason+":"+self.streams[s.strm_id].kind
             if s.status in ("DETACHED", "FAILED", "CLOSED", "SUCCEEDED") \
                     and not s.circ_id:
+                # XXX: REMAPs can do this (normal). Also REASON=DESTROY (bug?)
+                # Also timeouts.. Those should use the pending circ instead
+                # of returning..
                 plog("WARN", "Stream "+str(s.strm_id)+" detached from no circuit!")
                 PathBuilder.stream_status_event(self, s)
                 return
             if s.status == "DETACHED" or s.status == "FAILED":
-                    
-                # Update strm_selections count
-                for r in self.circuits[s.circ_id].path: r.strm_selections += 1
+                # Update strm_chosen count
+                # FIXME: use SENTRESOLVE/SENTCONNECT instead?
+                for r in self.circuits[s.circ_id].path: r.strm_chosen += 1
                 # Update failed count,reason_failed for exit
                 r = self.circuits[s.circ_id].exit
                 if not reason in r.reason_failed: r.reason_failed[reason] = 1
                 else: r.reason_failed[reason]+=1
                 r.strm_failed += 1
+                if reason not in self.failed_reasons:
+                    self.failed_reasons[reason] = FailedRouterList(reason)
+                self.failed_reasons[reason].add_r(r)
 
                 # If reason=timeout, update suspected for all
                 if lreason in ("TIMEOUT", "INTERNAL", "TORPROTOCOL", "DESTROY"):
@@ -200,19 +401,28 @@
                         if not reason in r.reason_suspected:
                             r.reason_suspected[reason] = 1
                         else: r.reason_suspected[reason]+=1
+                        if reason not in self.suspect_reasons:
+                            self.suspect_reasons[reason] = SuspectRouterList(reason)
+                        self.suspect_reasons[reason].add_r(r)
                 else:
                     for r in self.circuits[s.circ_id].path[:-1]:
                         r.strm_uncounted += 1
             elif s.status == "CLOSED":
                 # Always get both a closed and a failed.. 
                 #   - Check if the circuit exists still
+                # XXX: Save both closed and failed reason in stream object
                 if s.circ_id in self.circuits:
+                    # Update strm_chosen count
+                    for r in self.circuits[s.circ_id].path: r.strm_chosen += 1
                     if lreason in ("TIMEOUT", "INTERNAL", "TORPROTOCOL" "DESTROY"):
                         for r in self.circuits[s.circ_id].path[:-1]:
                             r.strm_suspected += 1
                             if not reason in r.reason_suspected:
                                 r.reason_suspected[reason] = 1
                             else: r.reason_suspected[reason]+=1
+                            if reason not in self.suspect_reasons:
+                                self.suspect_reasons[reason] = SuspectRouterList(reason)
+                            self.suspect_reasons[reason].add_r(r)
                     else:
                         for r in self.circuits[s.circ_id].path[:-1]:
                             r.strm_uncounted += 1
@@ -225,10 +435,9 @@
                             r.reason_failed[reason] = 1
                         else: r.reason_failed[reason]+=1
                         r.strm_failed += 1
-            elif s.status == "SUCCEEDED":
-                # Update strm_selections count
-                # XXX: use SENTRESOLVE/SENTCONNECT instead?
-                for r in self.circuits[s.circ_id].path: r.strm_selections += 1
+                        if reason not in self.failed_reasons:
+                            self.failed_reasons[reason] = FailedRouterList(reason)
+                        self.failed_reasons[reason].add_r(r)
         PathBuilder.stream_status_event(self, s)
 
     def ns_event(self, n):
@@ -273,7 +482,7 @@
             # http://effbot.org/pyfaq/what-kinds-of-global-value-mutation-are-thread-safe.htm
             le = h.last_exit
             if le:
-                s.write("250 LASTEXIT=$"+le.idhex.upper()+" ("+le.nickname+") OK\r\n")
+                s.write("250 LASTEXIT=$"+le.idhex+" ("+le.nickname+") OK\r\n")
             else:
                 s.write("250 LASTEXIT=0 (0) OK\r\n")
         elif command == "NEWEXIT" or command == "NEWNYM":
@@ -363,7 +572,7 @@
                 s.write("510 Integer expected\r\n")
         elif command == "SETEXIT":
             if arg:
-                # XXX: Hrmm.. if teh user is a dumbass, this will fail silently
+                # FIXME: Hrmm.. if teh user is a dumbass this will fail silently
                 def notlambda(sm): sm.exit_name=arg
                 h.schedule_selmgr(notlambda)
                 s.write("250 OK\r\n")