[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r15158: Add a new component for extracting lots of different statist (in projects: . dir-stats dir-stats/trunk)
Author: nickm
Date: 2008-06-11 22:40:22 -0400 (Wed, 11 Jun 2008)
New Revision: 15158
Added:
projects/dir-stats/
projects/dir-stats/branches/
projects/dir-stats/tags/
projects/dir-stats/trunk/
projects/dir-stats/trunk/LICENSE
projects/dir-stats/trunk/routerstats.py
Log:
Add a new component for extracting lots of different statistics from a
directory at once.
Added: projects/dir-stats/trunk/LICENSE
===================================================================
--- projects/dir-stats/trunk/LICENSE (rev 0)
+++ projects/dir-stats/trunk/LICENSE 2008-06-12 02:40:22 UTC (rev 15158)
@@ -0,0 +1,31 @@
+routerstats.py is distributed under this license:
+
+Copyright (c) 2008, The Tor Project, Inc.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+ * Neither the names of the copyright owners nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Added: projects/dir-stats/trunk/routerstats.py
===================================================================
--- projects/dir-stats/trunk/routerstats.py (rev 0)
+++ projects/dir-stats/trunk/routerstats.py 2008-06-12 02:40:22 UTC (rev 15158)
@@ -0,0 +1,333 @@
+#!/usr/bin/python
+# routerstats.py
+# Copyright 2008 The Tor Project, Inc
+# For license information, see LICENSE
+# For usage information, run routerstats.py --help
+
+import binascii
+import re
+import time, calendar
+import sys
+
+def parseTime(t):
+ tm = time.strptime(t, "%Y-%m-%d %H:%M:%S")
+ return calendar.timegm(tm)
+
+def parseHistory(line):
+ items = line.split()
+ when = "%s %s"%(items[0],items[1])
+ secPerChunk = int(items[2][1:])
+ if len(items) >= 5:
+ all = [ int(i) for i in items[4].split(",") ]
+ else:
+ all = [ ]
+
+ if len(all) == 0:
+ return NIL_HIST
+ if len(all) > 1:
+ all = all[:-1]
+
+ bwLatest = all[-1] // secPerChunk
+ bwMin = min(all)
+ bwMax = max(all)
+ bwMedian = sorted(all)[len(all)//2]
+ return bwMin, bwMedian, bwMax, bwLatest
+
+class StatSummary:
+ def __init__(self, items):
+ s = list(i for i in items if i is not None)
+ n = self.n = len(s)
+ if n == 0:
+ return
+ s.sort()
+ self.median = s[n//2]
+ self.p75 = s[int(n*.75)]
+ self.p90 = s[int(n*.9)]
+ self.p95 = s[int(n*.95)]
+ self.p99 = s[int(n*.99)]
+ self.total = sum(s)
+ self.mean = self.total/float(n)
+ self.minimum = s[0]
+ self.maximum = s[-1]
+
+ def __str__(self):
+ if not self.n:
+ return "N=0"
+ return ("N=%d\t"
+ "mean=%d\t"
+ "total=%d\t"
+ "min=%d\t50pct=%d\t75pct=%d\t"
+ "90pct=%d\t95pct=%d\t99pct=%d\tmax=%d")%(
+ self.n, self.mean, self.total,
+ self.minimum,self.median,self.p75,
+ self.p90,self.p95,self.p99,self.maximum)
+
+NIL_HIST = (None, None, None, None)
+
+class Router(object):
+ # We're going to make a lot of these. __slots__ tells python that it
+ # can be real efficient about storage.
+ __slots__ = [ 'orport', 'ident', 'published', 'bw', 'uptime',
+ 'listed', 'running', 'stable', 'guard', 'exit',
+ 'ip', 'readHist', 'writeHist' ]
+
+ def __init__(self, orport, ip, ident, published, bw, uptime):
+ self.orport = orport
+ self.ip = ip
+ self.ident = ident
+ self.published = published
+ self.bw = bw
+ self.listed = False
+ self.running = False
+ self.stable = None
+ self.guard = None
+ self.exit = None
+ self.uptime = False
+ self.readHist = NIL_HIST
+ self.writeHist = NIL_HIST
+
+ def setFlags(self, flags):
+ self.listed = True
+ self.running = ('Running' in flags)
+ self.stable = ('Stable' in flags)
+ self.guard = ('Guard' in flags)
+ self.exit = ('Exit' in flags)
+
+ def setHistory(self, readData, writeData):
+ if readData:
+ self.readHist = parseHistory(readData)
+ if writeData:
+ self.writeHist = parseHistory(writeData)
+
+class Stats:
+ def __init__(self):
+ self.routers = { }
+
+ def parseDescFile(self, fname):
+ f = open(fname, 'r')
+ purpose = ip = ident = published = bw = uptime = None
+ bwRead = bwWrite = None
+
+ for line in f:
+ line = line.strip()
+ if line.startswith("opt "): line = line[4:]
+ if line.startswith("@purpose "):
+ items = line.split()
+ purpose = items[1]
+
+ if line.startswith("router "):
+ inRouter = True
+ items = line.split()
+ nickname = items[1]
+ ip = items[2]
+ orport = int(items[3])
+ dirport = int(items[5])
+ elif line.startswith("fingerprint "):
+ ident = line[12:].replace(" ","")
+ elif line.startswith("published "):
+ published = line[10:]
+ elif line.startswith("bandwidth "):
+ items = line.split()
+ bw = int(items[2])
+ elif line.startswith("uptime "):
+ items = line.split()
+ uptime = int(items[1])
+ elif line.startswith("read-history "):
+ bwRead = line[13:]
+ elif line.startswith("write-history "):
+ bwWrite = line[14:]
+ elif line.startswith("router-signature"):
+ self.addRouter(purpose, orport, ip, ident, published, bw, uptime, bwRead, bwWrite)
+ orport = purpose = ip = ident = published = bw = uptime = None
+ bwRead = bwWrite = None
+
+ f.close()
+
+ def parseEIFile(self, fname):
+ f = open(fname, 'r')
+ nickname = ident = published = bwRead = bwWrite = None
+ for line in f:
+ line = line.strip()
+ if line.startswith("opt "): line = line[4:]
+ if line.startswith("extra-info "):
+ items = line.split()
+ nickname = items[1]
+ ident = binascii.a2b_hex(items[2])
+ elif line.startswith("published "):
+ published = parseTime(line[10:])
+ elif line.startswith("read-history "):
+ bwRead = line[13:]
+ elif line.startswith("write-history "):
+ bwWrite = line[14:]
+ elif line.startswith("router-signature"):
+ r = self.routers.get(ident)
+ if r and r.published == published and bwRead is not None:
+ r.setHistory(bwRead, bwWrite)
+ nickname = ident = published = bwRead = bwWrite = None
+
+ f.close()
+
+ def parseConsensus(self, fname):
+ f = open(fname, 'r')
+ ident = flags = None
+ for line in f:
+ if line.startswith("r "):
+ ident = binascii.a2b_base64(line.split()[2]+"=")
+ elif line.startswith("s "):
+ flags = line[2:].strip().split()
+ r = self.routers.get(ident)
+ if r:
+ r.setFlags(flags)
+ ident = flags = None
+
+ f.close()
+
+ def addRouter(self, purpose, orport, ip, ident, published, bw, uptime, bwRead, bwWrite):
+ if purpose and purpose != "general":
+ return
+ assert uptime is not None
+ assert orport is not None
+ assert ip is not None
+ assert ident is not None
+
+ ident = binascii.a2b_hex(ident)
+ published = parseTime(published)
+ assert len(ident) == 20
+ if ident not in self.routers or published >= self.routers[ident].published:
+ router = Router(orport, ip, ident, published, bw, uptime)
+ self.routers[ident] = router
+
+ router.setHistory(bwRead, bwWrite)
+
+ def getStats(self, statFn, predFn, mode="summary"):
+ if mode == 'summary':
+ return StatSummary(statFn(r) for r in self.routers.values() if predFn(r))
+ else:
+ assert mode == 'raw'
+ return [ statFn(r) for r in self.routers.values() if predFn(r) ]
+
+
+def _portFactory(portStr):
+ p = int(portStr)
+ return lambda r: r.orport == p
+
+##def _versionFactory(versionStr):
+
+PRED_FACTORIES = {
+ "port" : _portFactory,
+## "version" : _versionFactory,
+}
+
+PREDICATES = {
+ "running" : lambda r: r.running,
+ "listed" : lambda r: r.listed,
+ "stable" : lambda r: r.stable,
+ "guard" : lambda r: r.guard,
+ "exit" : lambda r: r.exit,
+ "dir" : lambda r: r.dirport not in (0,None),
+ "all": lambda r: True,
+ }
+
+STATS = {
+ "capacity" : lambda r: r.bw,
+ "reading" : lambda r: r.readHist[3],
+ "writing" : lambda r: r.writeHist[3],
+}
+
+def usage(code=0):
+ sys.stderr.write("""%s [-p predicate]... [-s stat]...[--raw]...<-d datadir>
+ Recognized predicates are: %s %s
+ Predicates can be joined with commas
+ Recognized stats are: %s
+ bandwidth -> declared capacity
+ reading,writing -> most recent actual bytes-per-sec over last 15 minutes
+""" % (sys.argv[0], " ".join(PREDICATES.keys()),
+ " ".join("%s-XXX"%p for p in PRED_FACTORIES.keys()),
+ " ".join(STATS.keys())))
+ sys.exit(code)
+
+
+if __name__ == '__main__':
+ import os, getopt
+
+ stats = [ ]
+ predicates = [ ]
+ datadir = None
+ mode = "summary"
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "p:s:d:h", ["help","raw"])
+ except getopt.GetoptError, e:
+ print e
+ usage()
+ for k,v in opts:
+ if k in ('-h', '--help'):
+ usage(0)
+ elif k == '-p':
+ subparts = []
+ for part in re.split(r'[:,]', v):
+ if "-" in part:
+ factname, factarg = part.split("-", 1)
+ try:
+ factory = PRED_FACTORIES[factname]
+ except KeyError:
+ print "Unrecognized predicate: %r"%part
+ subparts.append( factory(factarg) )
+ else:
+ try:
+ subparts.append( PREDICATES[part] )
+ except KeyError:
+ print "Unrecognized predicate: %r"%part
+ usage()
+ if len(subparts) == 1:
+ predicates.append((v, subparts[0]))
+ else:
+ def fn(r):
+ for p in subparts:
+ if not p(r):
+ return False
+ return True
+ predicates.append((v, fn))
+ elif k == '-s':
+ try:
+ stats.append( (v, STATS[v]) )
+ except KeyError:
+ print "Unrecognized stat: %r"%v
+ usage()
+ elif k == '-d':
+ datadir = v
+ elif k == '--raw':
+ mode="raw"
+ else:
+ raise Exception(k)
+ if not datadir:
+ sys.stderr.write("No datadir given. defaulting to ~/.tor\n")
+ datadir = os.path.expanduser("~/.tor")
+ if not os.path.exists(datadir):
+ sys.stderr.write("No such path: %r"%datadir)
+ if not predicates:
+ predicates.append( ("all", PREDICATES["all"]) )
+ if not stats:
+ stats.append( ("bandwidth", STATS["capacity"]) )
+
+ s = Stats()
+ for new in "", ".new":
+ fname = os.path.join(datadir, "cached-descriptors"+new)
+ if os.path.exists(fname):
+ #print "Parsing %s"%fname
+ s.parseDescFile(fname)
+ for new in "", ".new":
+ fname = os.path.join(datadir, "cached-extrainfo"+new)
+ if os.path.exists(fname):
+ #print "Parsing %s"%fname
+ s.parseEIFile(fname)
+ fname = os.path.join(datadir, "cached-consensus")
+ if os.path.exists(fname):
+ #print "Parsing %s"%fname
+ s.parseConsensus(fname)
+
+ print "%s routers parsed"%len(s.routers)
+ for predName, pred in predicates:
+ for statName, stat in stats:
+ print "%s: %s: %s" % (predName, statName,
+ s.getStats(statFn=stat, predFn=pred,
+ mode=mode))
Property changes on: projects/dir-stats/trunk/routerstats.py
___________________________________________________________________
Name: svn:executable
+