[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Add #6232 code written by gsathya and others.
commit e456caea6134de661d24be8a253394e323ac025a
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed Jul 4 23:11:06 2012 +0200
Add #6232 code written by gsathya and others.
Extract consensus weights from consensuses and calculate the Shannon
Entropy for them. Most of this code was written by gsathya and later
refined by asn and phw.
---
task-6232/.gitignore | 3 ++
task-6232/plot-entropy.R | 10 +++++
task-6232/pyentropy.py | 79 ++++++++++++++++++++++++++++++++++++++++++++
task-6232/run-pyentropy.py | 5 +++
4 files changed, 97 insertions(+), 0 deletions(-)
diff --git a/task-6232/.gitignore b/task-6232/.gitignore
new file mode 100644
index 0000000..8a0c627
--- /dev/null
+++ b/task-6232/.gitignore
@@ -0,0 +1,3 @@
+in/
+entropy.csv
+
diff --git a/task-6232/plot-entropy.R b/task-6232/plot-entropy.R
new file mode 100644
index 0000000..1334b88
--- /dev/null
+++ b/task-6232/plot-entropy.R
@@ -0,0 +1,10 @@
+library(ggplot2)
+d <- read.csv("entropy.csv", header = FALSE,
+ col.names = c("validafter", "entropy"))
+ggplot(d, aes(x = as.POSIXct(validafter), y = entropy)) +
+geom_line() +
+scale_x_datetime(name = "\nDate") +
+scale_y_continuous(name = "Entropy\n")
+ggsave("entropy.png", width = 8, height = 6, dpi = 100)
+
+
diff --git a/task-6232/pyentropy.py b/task-6232/pyentropy.py
new file mode 100644
index 0000000..f13f709
--- /dev/null
+++ b/task-6232/pyentropy.py
@@ -0,0 +1,79 @@
+"""
+Usage - python pyentropy.py <consensus-dir> <output-file>
+Output - A CSV file of the format <valid-after>,<entropy>
+rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
+"""
+
+import sys
+import math
+import os
+from decimal import *
+
+RESULTS = []
+KEYS = ['r','s','v','w','p','m']
+
+
+class Router:
+ def __init__(self):
+ self.lines = []
+ self.nick = None
+ self.bandwidth = None
+ self.flags = None
+ self.probability = None
+
+ def add(self, key, values):
+ if key == 'r':
+ self.nick = values[0]
+ if key == 'w':
+ self.bandwidth = int(values[0].split('=')[1])
+ if key == 's':
+ self.flags = values
+
+
+def run(file_name):
+ routers = []
+ # parse consensus
+ with open(file_name, 'r') as f:
+ for line in f.readlines():
+ key = line.split()[0]
+ values = line.split()[1:]
+ if key =='r':
+ router = Router()
+ router.add(key, values)
+ elif key == 'p':
+ router.add(key, values)
+ routers.append(router)
+ elif key == 'valid-after':
+ valid_after = ' '.join(values)
+ elif key in KEYS:
+ router.add(key, values)
+
+ # build hash table with freq. distribution
+ # key: bandwidth
+ # value: number of bandwidth's observations
+ bw_dist = {}
+ for router in routers:
+ if bw_dist.has_key(router.bandwidth):
+ bw_dist[router.bandwidth] += 1
+ else:
+ bw_dist[router.bandwidth] = 1
+
+ if len(routers) <= 0:
+ print "Error: amount of routers must be > 0."
+ return;
+
+ print "calculating entropy"
+ entropy = 0.0
+ for bw in bw_dist.iterkeys():
+ # p = probability of one particular bandwidth
+ p = float(bw_dist[bw]) / len(routers)
+ entropy += -(p * math.log(p, 2))
+
+ return ",".join([valid_after, str(entropy)])
+
+
+if __name__ == "__main__":
+ with open(sys.argv[2], 'w') as f:
+ for file_name in os.listdir(sys.argv[1]):
+ string = run(os.path.join(sys.argv[1], file_name))
+ f.write("%s\n" % (string))
diff --git a/task-6232/run-pyentropy.py b/task-6232/run-pyentropy.py
new file mode 100755
index 0000000..a94a7d6
--- /dev/null
+++ b/task-6232/run-pyentropy.py
@@ -0,0 +1,5 @@
+#!/bin/bash
+#### Uncomment to use most recent data instead of extracted tarballs
+###rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
+python pyentropy.py in/consensuses/ entropy.csv
+
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits