-Fixed (?) x-axis problem: x-axis of graph is now in ms hist (torflow/branches/gsoc2008/tools/BTAnalysis)

Author: fallon
Date: 2008-07-03 06:32:23 -0400 (Thu, 03 Jul 2008)
New Revision: 15623

-Fixed (?) x-axis problem:
  x-axis of graph is now in ms
  histogram indices are now in ms
  Stat.values now in ms (buildtimes are in s, so we divide by 1000)
-pareto is off (far lower than the curve), though seems to be more in line with mode

Modified: torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py
--- torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py	2008-07-03 10:25:01 UTC (rev 15622)
+++ torflow/branches/gsoc2008/tools/BTAnalysis/shufflebt.py	2008-07-03 10:32:23 UTC (rev 15623)
@@ -17,7 +17,7 @@
     self.values = []
     for line in self.f:
       line = line.split('\t')
-      self.values += [float(line[1])]
+      self.values += [float(line[1]) * 1000]
     self.buckets = {}
@@ -47,17 +47,17 @@
   def mode(self): # Requires makehistogram runs first
     counts = {}
-    greatest = 0
     greatest_val = 0
+    greatest_idx = 0
     for v in self.buckets.keys():
-      if self.buckets[v] > greatest:
-        greatest_val = v
-        greatest = self.buckets[v]
-    return greatest_val
+      if self.buckets[v] > greatest_val:
+        greatest_idx = v
+        greatest_val = self.buckets[v]
+    return greatest_idx
   # XXX: This doesn't seem to work for small #s of circuits  
-  def makehistogram(self,res,ncircuits,histname):
-    res = res /1000.0 # convert ms to s
+  def makehistogram(self,res,histname):
+    #res = res /1000.0 # convert ms to s
     values = copy.copy(self.values) 
     count = 0
@@ -67,7 +67,8 @@
       if v < res * i: count += 1
         count += 1
-        self.buckets[int(res * i * 10)] = count
+        self.buckets[int(res * i)] = count
+        #self.buckets[int(res * i * 10)] = count
         i += 1
         count = 0
     f = open(histname,'w')
@@ -136,6 +137,7 @@
   # gnuplot string for shifted, normalized exponential PDF
   # g(x,k,B) = (N * k*(Xm**k)/x**(k+1)))
   ps = fname+'(x)=(x<'+str(Xm)+') ? 0 : ('+str(N*k*(Xm**k))+'/x**('+str(k+1)+'))\n'
+  #ps = fname+'(x)='+str(N*k*(Xm**k))+'/x**('+str(k+1)+')\n'
   return ps
 def exp(mean,shift,N,fname):
@@ -165,6 +167,24 @@
   print "usage: shufflebt.py [-n <number of circuits>] [-s] [-g] [-k <k value>] [-d outdirname] [-r <res in ms>] <list of filenames>"
+def intermediate_filename(infile,shuffle,truncate,outdir):
+  if not shuffle and not truncate: return os.path.abspath(infile)
+  intermediate = [os.path.join(os.path.abspath(outdir),os.path.basename(infile))]
+  if truncate: intermediate.append(str(truncate))
+  if shuffle:
+    intermediate.append('shuffled')
+  return '.'.join(intermediate)
+def histogram_basefilename(infile,shuffle,truncate,res,outdir):
+  name = [os.path.join(os.path.abspath(outdir),os.path.basename(infile))]
+  if truncate: name.append(str(truncate))
+  if shuffle: name.append('shuffled')
+  name.append('res' + str(res))
+  return '.'.join(name)
 def getargs():
   # [-n <truncate to # circuits>] [-s] <list of filenames>
   k = 3
@@ -235,30 +255,33 @@
   for filename in filenames:
     print 'Processing',filename
+    print '------------------------------'
     if not os.path.exists(filename):
       print filename,'is not a valid path'
-    if truncate and sort or truncate and not sort:
-      newfile = os.path.join(dirname, os.path.basename(filename) + '.' + truncate + '.shuffled')
-    elif sort and not truncate:
-      newfile = os.path.join(dirname , os.path.basename(filename) + '.shuffled')
-    else:
-      newfile =  filename 
-    print newfile
+#    if truncate and sort or truncate and not sort:
+#      newfile = os.path.join(dirname, os.path.basename(filename) + '.' + truncate + '.shuffled')
+#    elif sort and not truncate:
+#      newfile = os.path.join(dirname , os.path.basename(filename) + '.shuffled')
+#    else:
+#      newfile =  filename 
+    newfile = intermediate_filename(filename,sort,truncate,dirname)
     # shuffle, create new file
     # create histogram from file
     s = Stats(newfile)
-    if not sort and not truncate:
-      histfilename = os.path.join(dirname ,os.path.basename(newfile )+ '.res' + str(res) +  '.hist')
-    else:
-      histfilename = newfile + '.res' + str(res) +'.hist'
-    s.makehistogram(res,newfile,histfilename)
+    histfilename = histogram_basefilename(filename,sort,truncate,res,dirname)
+#    if not sort and not truncate:
+#      histfilename = os.path.join(dirname ,os.path.basename(newfile )+ '.res' + str(res) +  '.hist')
+#    else:
+#      histfilename = newfile + '.res' + str(res) +'.hist'
+    s.makehistogram(res,histfilename + '.hist')
     mean = s.mean()
     stddev = s.stddev()
     median = s.median()
-    mode = s.mode()/10.0 # relies on s.makehistogram for buckets
+    #mode = s.mode()/10.0 # relies on s.makehistogram for buckets
+    mode = s.mode() # relies on s.makehistogram for buckets
     parK = s.paretoK(mode)
     modeN = s.modeN(mode)
     modeMean = s.modeMean(mode)
@@ -270,14 +293,16 @@
     if graph:
       # create gnuplot file
-      if not sort and not truncate:
-        newfile =  os.path.join(dirname, newfile)
-      plotname =  newfile + '.plt'
+#      if not sort and not truncate:
+#        newfile =  os.path.join(dirname, newfile)
+#      plotname =  newfile + '.plt'
+      plotname = histfilename + '.plt'
       ncircuits = str(len(s.values))
-      plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + newfile + ".png'\nset style fill  solid 1.00 border -1\nset style histogram clustered gap 1 title  offset character 0, 0, 0\nset datafile missing '-'\nset style data histograms\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits k=" + str(k) + "\nset ylabel '# Circuits'\nset xlabel 'time (in " + str(res) + " ms)'\n"
-      plotstr += "set label 'std dev=" + str(stddev) + "' at 170,15\n"
+      xtics =  max(s.values) / 10.0
+      plotstr = "set terminal png transparent nocrop enhanced size 800,600\nset output '" + histfilename + ".png'\nset style fill  solid 1.00 border -1\nset style histogram clustered gap 1 title  offset character 0, 0, 0\nset datafile missing '-'\nset title 'Buildtime Distribution Function for "+ ncircuits +" Circuits k=" + str(k) + "\nset ylabel '# Circuits'\nset xlabel 'time (ms)'\nset xtics " + str(xtics) + " \n"
+      plotstr += "set label 'std dev=" + str(stddev) + "' at 25000,100\n"
       # FIXME: Hrmm... http://en.wikipedia.org/wiki/Skewness? Seems like a hack
       # Or better: http://en.wikipedia.org/wiki/Gamma_distribution with k=3?
       # Would make sense if this is the sum of 3 paretos for the individual
@@ -301,11 +326,13 @@
     #  plotstr += gamma(k,baytheta[0],N,'bayplus') # + stddev
     #  plotstr += gamma(k,baytheta[1],N,'bayminus') # - stddev
-      plotstr += pareto(parK,mode*10,modeN,'pareto')
+      plotstr += pareto(parK,mode,modeN*10,'pareto')
       plotstr += exp(modeMean*10,mode*10,modeN,'expShifted')
+      #plotstr += pareto(parK,mode*10,modeN,'pareto')
+      #plotstr += exp(modeMean*10,mode*10,modeN,'expShifted')
    #   plotstr += "plot '" + newfile + ".hist' using 2,\\\n"
-      plotstr += "plot '" + histfilename + "' using 2,\\\n"
+      plotstr += "plot '" + histfilename + ".hist' using 1:2 with boxes,\\\n"
       plotstr += "pareto(x) title '" + "Shifted Pareto', \\\n"
       plotstr += "expShifted(x) title '" + "Shifted Exp' \n"