[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r15347: Aha! It is paretto. Should have tried this first. Just didn' (torflow/branches/gsoc2008/tools/BTAnalysis)
Author: mikeperry
Date: 2008-06-18 03:29:15 -0400 (Wed, 18 Jun 2008)
New Revision: 15347
Modified:
torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
Log:
Aha! It is paretto. Should have tried this first. Just didn't
expect latency to match the bandwidth distribution.
Modified: torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
===================================================================
--- torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py 2008-06-18 05:35:19 UTC (rev 15346)
+++ torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py 2008-06-18 07:29:15 UTC (rev 15347)
@@ -41,21 +41,17 @@
return values[(len(values) - 1)/2]
else:
return 0.0
- def mode(self):
+
+ def mode(self): # Requires makehistogram runs first
counts = {}
greatest = 0
greatest_val = 0
- for v in self.values:
- if v not in counts.keys():
- counts[v] = 1
- else:
- counts[v] += 1
- if counts[v] > greatest:
+ for v in self.buckets.keys():
+ if self.buckets[v] > greatest:
greatest_val = v
- greatest = counts[v]
+ greatest = self.buckets[v]
return greatest_val
-
def makehistogram(self,res,ncircuits,histname):
res = res /1000.0 # convert ms to s
values = copy.copy(self.values)
@@ -71,19 +67,45 @@
i += 1
count = 0
f = open(histname,'w')
- f.write('#build time <\t#circuits\n')
+ f.write('#build time <\t#circuits\n')
sortedkeys = self.buckets.keys()
sortedkeys.sort()
for b in sortedkeys:
towrite = str(b) + '\t' + str(self.buckets[b]) + '\n'
f.write(towrite)
f.close()
+
+ def paretoK(self, Xm):
+ n = 0
+ log_sum = 0
+ for x in self.values:
+ if x < Xm: continue
+ n += 1
+ log_sum += math.log(x)
+ return n/(log_sum - n*math.log(Xm))
+ # Calculate the mean beyond a mode value
+ def modeMean(self, Xm):
+ n = 0
+ tot = 0
+ for x in self.values:
+ if x < Xm: continue
+ n += 1
+ tot += x
+ return tot/n
+
+ def modeN(self, Xm):
+ n = 0
+ for x in self.values:
+ if x < Xm: continue
+ n += 1
+ return n
+
def maxlikelihood(self,k):
# theta estimator for gamma PDF
# maxlikelihood estimator
# theta = sum(values) / N*k
- return sum(self.values * 10)/(k * len(self.values))
+ return 10*sum(self.values)/(k * len(self.values))
def bayesian(self,k):
# bayesian estimator for gamma PDF
@@ -105,6 +127,27 @@
ps = fname + '(x) = '+str(N)+'*((x**' + str(k-1) + ')*(' +str(B**k)+ ')*(exp(-' + str(B) +'*x)))' +'/gamma('+str(k)+')\n'
return ps
+
+def pareto(k,Xm,N,fname):
+ # gnuplot string for shifted, normalized exponential PDF
+ # g(x,k,B) = (N * k*(Xm**k)/x**(k+1)))
+ ps = fname+'(x)=(x<'+str(Xm)+') ? 0 : ('+str(N*k*(Xm**k))+'/x**('+str(k+1)+'))\n'
+ return ps
+
+def exp(mean,shift,N,fname):
+ # gnuplot string for normalized exponential PDF
+ # g(x,k,B) = N * l*exp(-l*(x-shift))
+ l = 1.0/mean
+ ps = fname+'(x)=(x<'+str(shift)+')?0:('+str(N*l)+'*exp(-abs('+str(l)+'*(x-'+str(shift)+'))))\n'
+ return ps
+
+def shiftedExp(mean,shift,N,fname):
+ # gnuplot string for shifted, normalized exponential PDF
+ # g(x,k,B) = N * l*exp(-l*(x-shift))/(1+(1-exp(-l*shift)))
+ l = 1.0/mean
+ ps = fname+'(x)='+str(N*l)+'*exp(-abs('+str(l)+'*(x-'+str(shift)+')))/(1+(1-exp(-'+str(l*shift)+')))\n'
+ return ps
+
def poisson(u,N,fname):
ps = fname + "(x) = " + str(N) + "*(" + str(u) + "**x)*exp(-"+str(u)+")/gamma(x + 1)\n"
return ps
@@ -139,7 +182,7 @@
usage()
elif o == '-f': filename = a
elif o == '-d': dirname = a
- elif o == '-k': k = int(a)
+ elif o == '-k': k = float(a)
else:
assert False, "Bad option"
return ncircuits,filename,dirname,k
@@ -158,7 +201,7 @@
print 'Shuffling...',
newfile = dirname + '/' + filename + '.' + ncircuits
- cmd = '/usr/local/bin/sort -R ' + filename + ' | head -n ' + ncircuits + ' > ' + newfile
+ cmd = 'sort -R ' + filename + ' | head -n ' + ncircuits + ' > ' + newfile
p = popen2.Popen4(cmd)
p.wait()
@@ -167,12 +210,17 @@
# create histogram from file
print 'Calculating statistics and creating histogram...',
s = Stats(newfile)
+ s.makehistogram(100,newfile,newfile + '.hist')
mean = s.mean()
stddev = s.stddev()
median = s.median()
- mode = s.mode()
- s.makehistogram(100,newfile,newfile + '.hist')
- print 'Done'
+ mode = s.mode()/10.0 # relies on s.makehistogram for buckets
+ parK = s.paretoK(mode)
+ modeN = s.modeN(mode)
+ modeMean = s.modeMean(mode)
+ print 'Done.'
+ print 'Mean: '+str(mean)+', mode: '+str(mode)
+ print 'ParK: '+str(parK)
# get stats
@@ -202,17 +250,18 @@
#N = s.buckets[i] # num. circuits that have buildtimes
#close to mean/median/mode from histogram
- plotstr += gamma(k,maxliketheta,N, 'maxl')
- plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
- plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
+# plotstr += gamma(k,maxliketheta,N, 'maxl')
+# plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
+# plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
+ plotstr += pareto(parK,mode*10,modeN,'pareto')
+ plotstr += exp(modeMean*10,mode*10,modeN,'expShifted')
+
plotstr += "plot '" + newfile + ".hist' using 2,\\\n"
- plotstr += "maxl(x) title '" + "max. likelihood', \\\n"
- plotstr += "bayplus(x) title '" + "bayesian theta +', \\\n"
- plotstr += "bayminus(x) title '" + "bayesian theta -' \n"
+ plotstr += "pareto(x) title '" + "Shifted Pareto', \\\n"
+ plotstr += "expShifted(x) title '" + "Shifted Exp' \n"
-
f = open(plotname,'w')
f.write(plotstr)
@@ -221,8 +270,8 @@
# plot the file
print 'Plotting...',
-# p = popen2.Popen4('gnuplot ' + plotname)
- p = popen2.Popen4('gp4.2 ' + plotname)
+ p = popen2.Popen4('gnuplot ' + plotname)
+# p = popen2.Popen4('gp4.2 ' + plotname)
p.wait()
for err in p.fromchild: