[tor-commits] [metrics-web/master] Add table with top-10 countries to users.html.

commit d2078417458e63ded2bdbb28e061d3b28605f95d
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date:   Thu Jul 28 21:37:49 2011 -0400

    Add table with top-10 countries to users.html.
    Implements #3624.
 rserve/rserve-init.R                               |    2 +
 rserve/tables.R                                    |   32 ++++++
 .../ernie/web/GraphsSubpagesServlet.java           |   91 +++++++++++++++-
 src/org/torproject/ernie/web/RObjectGenerator.java |   70 +++++++++++-
 .../ernie/web/TableParameterChecker.java           |  117 ++++++++++++++++++++
 web/WEB-INF/users.jsp                              |   30 +++++
 6 files changed, 338 insertions(+), 4 deletions(-)

diff --git a/rserve/rserve-init.R b/rserve/rserve-init.R
index bafb5ba..8344705 100644
--- a/rserve/rserve-init.R
+++ b/rserve/rserve-init.R
@@ -15,3 +15,5 @@ dbpassword= ""
diff --git a/rserve/tables.R b/rserve/tables.R
new file mode 100644
index 0000000..725bc31
--- /dev/null
+++ b/rserve/tables.R
@@ -0,0 +1,32 @@
+countrynames <- function(countries) {
+  sapply(countries, countryname)
+write_direct_users <- function(start, end, path) {
+  drv <- dbDriver("PostgreSQL")
+  con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
+  q <- paste("SELECT date, country, r, bwp, brn, bwn, brp, bwr, brr ",
+      "FROM user_stats WHERE date >= '", start, "' AND date <= '", end,
+      "' AND date < (SELECT MAX(date) FROM user_stats) - 1 ",
+      "ORDER BY date, country", sep = "")
+  rs <- dbSendQuery(con, q)
+  u <- fetch(rs, n = -1)
+  dbDisconnect(con)
+  dbUnloadDriver(drv)
+  d <- data.frame(date = u$date, country = u$country,
+       directusers = floor(u$r * (u$bwp * u$brn / u$bwn - u$brp) /
+               (u$bwr * u$brn / u$bwn - u$brr) / 10))
+  d <- aggregate(d$directusers, by = list(country = d$country), mean)
+  total <- d[d$country == "zy", "x"]
+  d <- d[d$country != "zy", ]
+  d <- data.frame(country = d$country, directusers = d$x)
+  d <- d[order(d$directusers, decreasing = TRUE), ]
+  d <- d[1:10, ]
+  d <- data.frame(
+    cc = as.character(d$country),
+    country = sub('the ', '', countrynames(as.character(d$country))),
+    abs = round(d$directusers),
+    rel = round(100 * d$directusers / total, 2))
+  write.csv(d, path, quote = FALSE, row.names = FALSE)
diff --git a/src/org/torproject/ernie/web/GraphsSubpagesServlet.java b/src/org/torproject/ernie/web/GraphsSubpagesServlet.java
index f8dfba4..e3845cf 100644
--- a/src/org/torproject/ernie/web/GraphsSubpagesServlet.java
+++ b/src/org/torproject/ernie/web/GraphsSubpagesServlet.java
@@ -12,9 +12,15 @@ public class GraphsSubpagesServlet extends HttpServlet {
    * are forwarded. */
   private Map<String, String> availableGraphsSubpages;
+  /* Available tables on graphs subpages. */
+  private Map<String, Set<String>> availableGraphsSubpageTables;
   /* Country codes and names for per-country graphs. */
   private List<String[]> knownCountries;
+  /* R object generator for generating table data. */
+  private RObjectGenerator rObjectGenerator;
   public GraphsSubpagesServlet() {
     this.availableGraphsSubpages = new HashMap<String, String>();
@@ -25,9 +31,21 @@ public class GraphsSubpagesServlet extends HttpServlet {
+    this.availableGraphsSubpageTables =
+        new HashMap<String, Set<String>>();
+    this.availableGraphsSubpageTables.put("users.html",
+        new HashSet<String>(Arrays.asList("direct-users".split(","))));
     this.knownCountries = Countries.getInstance().getCountryList();
+  public void init() {
+    /* Get a reference to the R object generator that we need to generate
+     * table data. */
+    this.rObjectGenerator = (RObjectGenerator) getServletContext().
+        getAttribute("RObjectGenerator");
+  }
   public void doGet(HttpServletRequest request,
       HttpServletResponse response) throws IOException, ServletException {
@@ -48,8 +66,9 @@ public class GraphsSubpagesServlet extends HttpServlet {
     String jsp = availableGraphsSubpages.get(requestedPage);
-    /* Find out which graph type was requested, if any. */
+    /* Find out which graph or table type was requested, if any. */
     String requestedGraph = request.getParameter("graph");
+    String requestedTable = request.getParameter("table");
     if (requestedGraph != null) {
       /* Check if the passed parameters are valid. */
@@ -74,6 +93,76 @@ public class GraphsSubpagesServlet extends HttpServlet {
+    if (requestedTable != null) {
+      /* Check if the passed parameters are valid. */
+      Map<String, String[]> checkedParameters = TableParameterChecker.
+          getInstance().checkParameters(requestedTable,
+          request.getParameterMap());
+      if (checkedParameters != null) {
+        /* Set the table's attributes to the appropriate values, so that
+         * we can prepopulate the form. */
+        for (Map.Entry<String, String[]> param :
+            checkedParameters.entrySet()) {
+          request.setAttribute(requestedTable.replaceAll("-", "_") + "_"
+              + param.getKey(), param.getValue());
+        }
+      }
+    }
+    /* Trigger generation of table data if the graphs subpage has any
+     * tables, regardless of whether a table update was requested. */
+    if (this.availableGraphsSubpageTables.containsKey(requestedPage)) {
+      for (String tableName :
+          this.availableGraphsSubpageTables.get(requestedPage)) {
+        Map<String, String[]> checkedParameters = null;
+        if (tableName.equals(requestedTable)) {
+          checkedParameters = TableParameterChecker.
+              getInstance().checkParameters(requestedTable,
+              request.getParameterMap());
+        } else {
+          checkedParameters = TableParameterChecker.
+              getInstance().checkParameters(tableName, null);
+        }
+        /* Prepare filename and R query string. */
+        StringBuilder rQueryBuilder = new StringBuilder("write_"
+            + tableName.replaceAll("-", "_") + "("),
+            tableFilenameBuilder = new StringBuilder(tableName);
+        for (Map.Entry<String, String[]> parameter :
+            checkedParameters.entrySet()) {
+          String parameterName = parameter.getKey();
+          String[] parameterValues = parameter.getValue();
+          for (String param : parameterValues) {
+            tableFilenameBuilder.append("-" + param);
+          }
+          if (parameterValues.length < 2) {
+            rQueryBuilder.append(parameterName + " = '"
+                + parameterValues[0] + "', ");
+          } else {
+            rQueryBuilder.append(parameterName + " = c(");
+            for (int i = 0; i < parameterValues.length - 1; i++) {
+              rQueryBuilder.append("'" + parameterValues[i] + "', ");
+            }
+            rQueryBuilder.append("'" + parameterValues[
+                parameterValues.length - 1] + "'), ");
+          }
+        }
+        tableFilenameBuilder.append(".tbl");
+        String tableFilename = tableFilenameBuilder.toString();
+        rQueryBuilder.append("path = '%s')");
+        String rQuery = rQueryBuilder.toString();
+        /* Generate table data and add it as request attribute. */
+        List<Map<String, String>> tableData = rObjectGenerator.
+            generateTable(rQuery, tableFilename);
+        request.setAttribute(tableName.replaceAll("-", "_")
+              + "_tabledata", tableData);
+      }
+    }
     /* Pass list of known countries in case we want to display them. */
     request.setAttribute("countries", this.knownCountries);
diff --git a/src/org/torproject/ernie/web/RObjectGenerator.java b/src/org/torproject/ernie/web/RObjectGenerator.java
index 7f152dd..10aa3b8 100644
--- a/src/org/torproject/ernie/web/RObjectGenerator.java
+++ b/src/org/torproject/ernie/web/RObjectGenerator.java
@@ -55,9 +55,10 @@ public class RObjectGenerator implements ServletContextListener {
     if (!imageFile.exists() || imageFile.lastModified() < now
         - this.maxCacheAge * 1000L) {
-      /* We do. Update the R query to contain the absolute path to the file
-       * to be generated, create a connection to Rserve, run the R query,
-       * and close the connection. The generated graph will be on disk. */
+      /* We do. Update the R query to contain the absolute path to the
+       * file to be generated, create a connection to Rserve, run the R
+       * query, and close the connection. The generated graph will be on
+       * disk. */
       rQuery = String.format(rQuery, imageFile.getAbsolutePath());
       try {
         RConnection rc = new RConnection(rserveHost, rservePort);
@@ -135,5 +136,68 @@ public class RObjectGenerator implements ServletContextListener {
     /* Return the csv file. */
     return result;
+  /* Generate table data using the given R query and filename or read
+   * previously generated table data from disk if it's not too old and
+   * return table data. */
+  public List<Map<String, String>> generateTable(String rQuery,
+      String tableFilename) {
+    /* See if we need to generate this table. */
+    File tableFile = new File(this.cachedGraphsDirectory + "/"
+        + tableFilename);
+    long now = System.currentTimeMillis();
+    if (!tableFile.exists() || tableFile.lastModified() < now
+        - this.maxCacheAge * 1000L) {
+      /* We do. Update the R query to contain the absolute path to the
+       * file to be generated, create a connection to Rserve, run the R
+       * query, and close the connection. The generated csv file will be
+       * on disk in the same directory as the generated graphs. */
+      rQuery = String.format(rQuery, tableFile.getAbsolutePath());
+      try {
+        RConnection rc = new RConnection(rserveHost, rservePort);
+        rc.eval(rQuery);
+        rc.close();
+      } catch (RserveException e) {
+        return null;
+      }
+      /* Check that we really just generated the file */
+      if (!tableFile.exists() || tableFile.lastModified() < now
+          - this.maxCacheAge * 1000L) {
+        return null;
+      }
+    }
+    /* Read the text file from disk and write the table content to a
+     * map. */
+    List<Map<String, String>> result = null;
+    try {
+      result = new ArrayList<Map<String, String>>();
+      BufferedReader br = new BufferedReader(new FileReader(tableFile));
+      String line = br.readLine();
+      if (line != null) {
+        List<String> headers = new ArrayList<String>(Arrays.asList(
+            line.split(",")));
+        while ((line = br.readLine()) != null) {
+          String[] parts = line.split(",");
+          if (headers.size() != parts.length) {
+            return null;
+          }
+          Map<String, String> row = new HashMap<String, String>();
+          for (int i = 0; i < headers.size(); i++) {
+            row.put(headers.get(i), parts[i]);
+          }
+          result.add(row);
+        }
+      }
+    } catch (IOException e) {
+      return null;
+    }
+    /* Return table values. */
+    return result;
+  }
diff --git a/src/org/torproject/ernie/web/TableParameterChecker.java b/src/org/torproject/ernie/web/TableParameterChecker.java
new file mode 100644
index 0000000..9bf1c33
--- /dev/null
+++ b/src/org/torproject/ernie/web/TableParameterChecker.java
@@ -0,0 +1,117 @@
+package org.torproject.ernie.web;
+import java.text.*;
+import java.util.*;
+import java.util.regex.*;
+ * Checks request parameters passed to generate tables.
+ */
+public class TableParameterChecker {
+  /**
+   * Singleton instance of this class.
+   */
+  private static TableParameterChecker instance =
+      new TableParameterChecker();
+  /**
+   * Returns the singleton instance of this class.
+   */
+  public static TableParameterChecker getInstance() {
+    return instance;
+  }
+  /* Date format for parsing start and end dates. */
+  private SimpleDateFormat dateFormat;
+  /* Available tables with corresponding parameter lists. */
+  private Map<String, String> availableTables;
+  /* Known parameters and parameter values. */
+  private Map<String, String> knownParameterValues;
+  /**
+   * Initializes map with valid parameters for each of the graphs.
+   */
+  public TableParameterChecker() {
+    this.dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+    this.dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    this.availableTables = new HashMap<String, String>();
+    this.availableTables.put("direct-users", "start,end,filename");
+    this.knownParameterValues = new HashMap<String, String>();
+  }
+  /**
+   * Checks request parameters for the given table type and returns a map
+   * of recognized parameters, or null if the table type doesn't exist or
+   * the parameters are invalid.
+   */
+  public Map<String, String[]> checkParameters(String tableType,
+      Map requestParameters) {
+    /* Check if the graph type exists. */
+    if (tableType == null ||
+        !this.availableTables.containsKey(tableType)) {
+      return null;
+    }
+    /* Find out which other parameters are supported by this table type
+     * and parse them if they are given. */
+    Set<String> supportedTableParameters = new HashSet<String>(Arrays.
+        asList(this.availableTables.get(tableType).split(",")));
+    Map<String, String[]> recognizedTableParameters =
+        new HashMap<String, String[]>();
+    /* Parse start and end dates if supported by the table type. If no end
+     * date is provided, set it to today. If no start date is provided,
+     * set it to 90 days before the end date. Make sure that start date
+     * precedes end date. */
+    if (supportedTableParameters.contains("start") ||
+        supportedTableParameters.contains("end")) {
+      String[] startParameter = null;
+      String[] endParameter = null;
+      if (requestParameters != null) {
+        startParameter = (String[]) requestParameters.get("start");
+        endParameter = (String[]) requestParameters.get("end");
+      }
+      long endTimestamp = System.currentTimeMillis();
+      if (endParameter != null && endParameter.length > 0 &&
+          endParameter[0].length() > 0) {
+        try {
+          endTimestamp = dateFormat.parse(endParameter[0]).getTime();
+        } catch (ParseException e)  {
+          return null;
+        }
+        if (!endParameter[0].startsWith("20")) {
+          return null;
+        }
+      }
+      endParameter = new String[] { dateFormat.format(endTimestamp) };
+      long startTimestamp = endTimestamp - 90L * 24L * 60L * 60L * 1000L;
+      if (startParameter != null && startParameter.length > 0 &&
+          startParameter[0].length() > 0) {
+        try {
+          startTimestamp = dateFormat.parse(startParameter[0]).getTime();
+        } catch (ParseException e)  {
+          return null;
+        }
+        if (!startParameter[0].startsWith("20")) {
+          return null;
+        }
+      }
+      startParameter = new String[] { dateFormat.format(startTimestamp) };
+      if (startTimestamp > endTimestamp) {
+       return null;
+      }
+      recognizedTableParameters.put("start", startParameter);
+      recognizedTableParameters.put("end", endParameter);
+    }
+    /* We now have a map with all required table parameters. Return it. */
+    return recognizedTableParameters;
+  }
diff --git a/web/WEB-INF/users.jsp b/web/WEB-INF/users.jsp
index 814c15c..b378af1 100644
--- a/web/WEB-INF/users.jsp
+++ b/web/WEB-INF/users.jsp
@@ -1,5 +1,6 @@
 <%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"; %>
 <%@ taglib prefix="fn" uri="http://java.sun.com/jsp/jstl/functions"; %>
+<%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt"; %>
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
@@ -58,6 +59,35 @@ based on the requests seen by a few dozen directory mirrors.</p>
+<a name="direct-users-table"></a>
+  <tr>
+    <th>Country</th>
+    <th>Mean daily users</th>
+  </tr>
+  <c:forEach var="row" items="${direct_users_tabledata}">
+    <tr>
+      <td><a href="users.html?graph=direct-users&country=${row['cc']}#direct-users">${row['country']}</a>&emsp;</td>
+      <td>${row['abs']} (<fmt:formatNumber type="number" minFractionDigits="2" value="${row['rel']}" /> %)</td>
+    </tr>
+  </c:forEach>
+<form action="users.html#direct-users-table">
+  <div class="formrow">
+    <input type="hidden" name="table" value="direct-users">
+    <p>
+    <label>Start date (yyyy-mm-dd):</label>
+      <input type="text" name="start" size="10"
+             value="<c:choose><c:when test="${fn:length(direct_users_start) == 0}">${default_start_date}</c:when><c:otherwise>${direct_users_start[0]}</c:otherwise></c:choose>">
+    <label>End date (yyyy-mm-dd):</label>
+      <input type="text" name="end" size="10"
+             value="<c:choose><c:when test="${fn:length(direct_users_end) == 0}">${default_end_date}</c:when><c:otherwise>${direct_users_end[0]}</c:otherwise></c:choose>">
+    </p><p>
+    <input class="submit" type="submit" value="Update table">
+    </p>
+  </div>
 <p><a href="csv/direct-users.csv">CSV</a> file containing all data.</p>
 <p><a href="csv/monthly-users-peak.csv">CSV</a> file containing peak daily
 Tor users (direct and bridge) per month by country.</p>

