[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [metrics/master] Add script to parse entry stats.
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed, 1 Jul 2009 21:04:55 +0200
Subject: Add script to parse entry stats.
Commit: 74017d403859e6cca46fa6f553df713ae9b45c2a
---
HOWTO | 19 +++
out/entrystats/statsyrtne.csv | 10 ++
.../torproject/metrics/entry/ParseEntryStats.java | 146 ++++++++++++++++++++
3 files changed, 175 insertions(+), 0 deletions(-)
create mode 100644 out/entrystats/statsyrtne.csv
create mode 100644 src/org/torproject/metrics/entry/ParseEntryStats.java
diff --git a/HOWTO b/HOWTO
index b817eb2..002285a 100644
--- a/HOWTO
+++ b/HOWTO
@@ -240,3 +240,22 @@ $ java -cp bin/:lib/*
out/performance/
+5 Entry-guard statistics
+=========================
+
+Put the entry-stats files in a directory data/entrystats/, giving them
+arbitrary filenames to identify the routers later on. Router nicknames are
+probably a fine choice.
+
+$ make data/
+$ make data/entrystats/
+
+Compile the parsing app:
+
+$ javac -d bin/ -cp src/:lib/* src/org/torproject/metrics/entry/*.java
+
+Run the parsing script:
+
+$ java -cp bin/:lib/* org.torproject.metrics.entry.ParseEntryStats
+ data/entrystats/ out/entrystats/
+
diff --git a/out/entrystats/statsyrtne.csv b/out/entrystats/statsyrtne.csv
new file mode 100644
index 0000000..8007b75
--- /dev/null
+++ b/out/entrystats/statsyrtne.csv
@@ -0,0 +1,10 @@
+time,ipsad,ipsae,ipsai,ipsal,ipsam,ipsar,ipsat,ipsau,ipsaz,ipsba,ipsbd,ipsbe,ipsbg,ipsbh,ipsbr,ipsbs,ipsby,ipsca,ipsch,ipscl,ipscn,ipsco,ipscr,ipscs,ipscy,ipscz,ipsde,ipsdk,ipsdo,ipsec,ipsee,ipseg,ipses,ipsfi,ipsfr,ipsgb,ipsgd,ipsge,ipsgr,ipsgt,ipsgu,ipsgy,ipshk,ipshn,ipshr,ipshu,ipsid,ipsie,ipsil,ipsin,ipsiq,ipsir,ipsis,ipsit,ipsjm,ipsjo,ipsjp,ipske,ipskg,ipskh,ipskr,ipskw,ipskz,ipslb,ipslk,ipslt,ipslu,ipslv,ipsmc,ipsmd,ipsmk,ipsmo,ipsmq,ipsmv,ipsmx,ipsmy,ipsng,ipsni,ipsnl,ipsno,ipsnp,ipsnz,ipsom,ipspa,ipspe,ipsph,ipspk,ipspl,ipspr,ipsps,ipspt,ipsqa,ipsro,ipsrs,ipsru,ipssa,ipsse,ipssg,ipssi,ipssk,ipssv,ipssy,ipsth,ipstr,ipstt,ipstw,ipsua,ipsug,ipsus,ipsuy,ipsve,ipsvn,ipsye,ipszw,ipstotal
+2009-06-17,4,12,0,0,4,4,12,12,0,4,4,4,4,4,28,4,0,20,4,4,244,4,4,4,0,4,124,4,0,0,4,0,12,4,44,36,0,4,12,4,4,0,4,0,4,4,12,4,4,28,4,84,0,28,0,4,36,0,0,0,28,12,4,4,4,4,4,0,0,4,0,0,0,4,4,4,4,0,12,0,12,0,4,4,4,28,4,28,4,4,4,4,4,0,36,20,12,4,4,4,0,4,12,20,4,12,12,4,140,0,4,20,4,0,1368
+2009-06-18,0,4,4,0,4,4,12,12,4,4,0,4,4,4,28,4,4,20,4,4,236,4,0,4,4,4,228,4,4,0,0,4,20,4,60,52,0,4,4,0,0,4,12,4,4,4,20,4,12,44,4,92,0,44,0,4,44,0,0,0,28,12,4,4,0,4,4,4,0,4,4,0,0,0,4,12,0,0,28,4,4,4,4,4,4,20,4,44,0,4,12,4,12,4,36,20,28,4,4,4,0,20,12,12,0,20,12,0,220,0,4,4,0,4,1700
+2009-06-19,0,4,0,0,4,12,20,12,4,4,4,4,4,4,28,0,0,20,4,4,172,4,4,4,4,12,260,4,4,0,4,0,20,12,68,44,0,4,4,0,0,0,4,0,4,12,12,4,12,20,4,60,0,36,4,4,20,0,0,0,28,4,4,4,0,4,4,4,0,4,4,4,4,0,12,12,4,0,28,4,4,0,0,4,4,12,4,36,0,4,4,4,12,4,52,20,44,4,0,4,0,4,12,28,0,4,12,0,292,0,4,12,4,0,1668
+2009-06-20,0,4,4,4,0,12,12,4,4,4,0,4,4,4,20,0,0,20,4,4,204,4,4,4,4,12,268,4,0,0,4,0,20,20,68,44,0,0,12,0,0,0,4,4,4,4,4,4,12,20,4,68,0,36,4,12,20,4,0,4,12,4,0,4,4,4,4,4,0,0,0,0,0,0,4,12,0,0,28,12,4,4,0,4,0,12,4,28,0,4,12,0,4,0,52,20,44,4,4,4,0,4,4,20,4,4,12,0,268,4,0,12,0,0,1616
+2009-06-21,0,4,0,0,4,4,20,4,4,0,0,4,4,4,12,0,4,20,4,4,164,4,4,4,4,4,252,4,4,0,4,4,20,4,68,44,0,0,4,0,4,0,4,0,4,4,4,4,12,12,4,84,0,44,4,4,20,0,0,0,4,12,0,0,0,4,4,4,0,4,0,0,0,0,4,4,0,4,36,4,4,0,4,4,4,4,4,28,0,4,4,4,4,4,44,20,44,4,4,4,0,4,4,12,0,12,12,0,284,0,4,4,4,0,1512
+2009-06-22,0,12,0,0,0,4,28,4,4,4,4,4,4,4,12,0,4,20,20,4,164,0,0,4,0,12,308,4,0,4,0,4,20,20,92,36,0,0,4,0,0,0,4,0,4,4,12,0,20,20,4,68,0,44,4,4,20,0,0,0,20,12,0,4,4,4,4,4,0,4,0,0,0,0,4,12,0,0,36,4,4,0,0,4,4,12,4,28,0,0,4,4,12,4,44,20,68,4,0,4,0,4,4,12,0,12,12,0,332,0,4,12,0,4,1740
+2009-06-23,0,4,0,0,4,4,20,12,4,4,4,4,4,0,12,0,4,20,12,4,164,0,4,4,0,4,292,4,4,0,4,4,12,12,76,36,0,4,4,0,0,0,4,4,4,12,4,4,12,20,4,68,0,44,0,4,20,0,0,0,20,4,4,4,4,4,4,4,0,4,0,0,0,0,4,12,0,0,20,4,4,0,0,4,4,12,4,28,0,0,12,4,4,0,44,20,52,4,0,4,4,4,4,20,0,4,20,0,300,0,4,12,4,0,1616
+2009-06-24,0,4,0,0,0,4,20,12,0,4,0,4,4,4,20,0,4,28,12,4,156,4,4,4,4,12,260,4,0,0,4,0,20,12,76,44,4,4,4,0,0,0,4,0,4,12,12,4,12,28,0,84,0,44,0,4,20,0,0,0,20,12,4,4,0,4,4,4,4,4,0,0,0,4,12,4,0,0,28,4,4,0,0,4,4,4,4,28,0,4,12,4,4,4,44,20,52,4,0,0,0,4,4,20,4,4,4,0,300,0,4,12,0,0,1632
+2009-06-25,0,4,0,0,0,4,28,4,0,4,0,0,4,4,20,0,4,20,12,4,212,4,4,4,4,12,276,4,0,0,4,0,12,12,68,36,0,0,4,0,0,0,12,0,4,4,12,4,12,28,4,92,4,52,0,4,20,0,4,0,20,12,4,4,0,4,4,4,0,0,0,4,0,4,4,12,0,0,28,4,4,4,4,4,4,12,4,36,0,0,12,0,4,0,60,20,52,4,0,4,0,4,4,28,4,12,12,0,316,0,4,12,4,0,1760
diff --git a/src/org/torproject/metrics/entry/ParseEntryStats.java b/src/org/torproject/metrics/entry/ParseEntryStats.java
new file mode 100644
index 0000000..0cb0669
--- /dev/null
+++ b/src/org/torproject/metrics/entry/ParseEntryStats.java
@@ -0,0 +1,146 @@
+/* Copyright 2009 Karsten Loesing
+ * See LICENSE for licensing information */
+package org.torproject.metrics.entry;
+
+import java.io.*;
+import java.text.*;
+import java.util.*;
+
+public final class ParseEntryStats {
+
+ private static class DataPoint {
+ String date;
+ SortedMap<String, Integer> ips;
+ }
+
+ private static SortedSet<String> allCountries = new TreeSet<String>();
+ private static SortedSet<String> allDates = new TreeSet<String>();
+ private static SortedMap<String, SortedMap<String, DataPoint>> allDataPoints
+ = new TreeMap<String, SortedMap<String, DataPoint>>();
+
+ private static SortedMap<String, Integer> parseCountryLine(String line) {
+ SortedMap<String, Integer> result = new TreeMap<String, Integer>();
+ if (line.length() < 2 || line.split(" ").length < 2) {
+ return result;
+ }
+ String[] countries = line.split(" ")[1].split(",");
+ for (String part : countries) {
+ String country = part.split("=")[0];
+ Integer count = Integer.parseInt(part.split("=")[1]) - 4;
+ allCountries.add(country);
+ result.put(country, count);
+ }
+ return result;
+ }
+
+ private ParseEntryStats() {
+ }
+
+ public static void main(final String[] args) throws Exception {
+
+ // check input parameters
+ if (args.length < 2) {
+ System.err.println("Usage: java "
+ + ParseEntryStats.class.getSimpleName()
+ + " <input directory> <output directory>");
+ System.exit(1);
+ }
+ File inputDirectory = new File(args[0]);
+ if (!inputDirectory.exists() || !inputDirectory.isDirectory()) {
+ System.err.println("Input directory '"
+ + inputDirectory.getAbsolutePath()
+ + "' does not exist or is not a directory.");
+ System.exit(1);
+ }
+ File outputDirectory = new File(args[1]);
+ if (outputDirectory.exists() && !outputDirectory.isDirectory()) {
+ System.err.println("Output directory '"
+ + outputDirectory.getAbsolutePath()
+ + "' exists, but is not a directory.");
+ System.exit(1);
+ }
+ outputDirectory.mkdir();
+
+ long started = System.currentTimeMillis();
+
+ // parse input files
+ for (File inputFile : inputDirectory.listFiles()) {
+ SortedMap<String, DataPoint> currentDataPoints
+ = new TreeMap<String, DataPoint>();
+ allDataPoints.put(inputFile.getName(), currentDataPoints);
+ BufferedReader br = new BufferedReader(new FileReader(
+ inputFile));
+ String line = null;
+ String currentDate = null;
+ DataPoint currentDataPoint = null;
+ boolean haveSeenActualNumbers = false;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("written ")) {
+ if (haveSeenActualNumbers) {
+ currentDataPoints.put(currentDate, currentDataPoint);
+ }
+ currentDataPoint = new DataPoint();
+ currentDate = line.split(" ")[1];
+ allDates.add(currentDate);
+ } else if (line.startsWith("started-at ")) {
+ // ignored
+ } else if (line.startsWith("ips ")) {
+ currentDataPoint.ips = parseCountryLine(line);
+ if (line.split(" ").length > 1) {
+ haveSeenActualNumbers = true;
+ }
+ }
+ }
+ if (haveSeenActualNumbers) {
+ currentDataPoints.put(currentDate, currentDataPoint);
+ }
+ br.close();
+ }
+
+ System.out.printf("We have seen %d countries on %d days on %d "
+ + "entry nodes.%n", allCountries.size(), allDates.size(),
+ allDataPoints.size());
+
+ for (Map.Entry<String, SortedMap<String, DataPoint>> e
+ : allDataPoints.entrySet()) {
+ String directory = e.getKey();
+ SortedMap<String, DataPoint> dataPoints = e.getValue();
+ File outFile = new File(outputDirectory.getAbsolutePath()
+ + File.separatorChar + directory + ".csv");
+ BufferedWriter out = new BufferedWriter(new FileWriter(
+ outFile, false));
+ out.write("time,");
+ for (String f : allCountries) {
+ out.write(String.format("ips%s,", f));
+ }
+ out.write("ipstotal\n");
+ for (String date : allDates) {
+ if (!dataPoints.containsKey(date)) {
+ out.write(date + ",");
+ int nas = allCountries.size();
+ for (int i = 0; i < nas; i++) {
+ out.write("NA,");
+ }
+ out.write("NA\n");
+ } else {
+ DataPoint currentDataPoint = dataPoints.get(date);
+ out.write(date + ",");
+ int ipstotal = 0;
+ for (String f : allCountries) {
+ int ips = currentDataPoint.ips.containsKey(f)
+ ? currentDataPoint.ips.get(f) : 0;
+ ipstotal += ips;
+ out.write(String.format("%d,", ips));
+ }
+ out.write(String.format("%d%n", ipstotal));
+ }
+ }
+ out.close();
+ }
+
+ System.out.println("Parsing finished after "
+ + ((System.currentTimeMillis() - started) / 1000)
+ + " seconds.");
+ }
+}
+
--
1.5.6.5