[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] [metrics-web/master] Improve relay search page.
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Sat, 18 Sep 2010 12:23:50 +0200
Subject: Improve relay search page.
Commit: fad5fec418dedcf0a64a4e28e40e5b66169d5ef2
Search in the database, not in the consensus files on disk (but leave the
functionality for searching on disk in for the moment). This speeds up
searching a lot.
Allow searching by month or day.
Make nickname search case-insensitive.
---
.../torproject/ernie/web/RelaySearchServlet.java | 274 +++++++++++++++++---
1 files changed, 240 insertions(+), 34 deletions(-)
diff --git a/src/org/torproject/ernie/web/RelaySearchServlet.java b/src/org/torproject/ernie/web/RelaySearchServlet.java
index 8f3ed57..91e66a9 100644
--- a/src/org/torproject/ernie/web/RelaySearchServlet.java
+++ b/src/org/torproject/ernie/web/RelaySearchServlet.java
@@ -4,6 +4,7 @@ import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.math.*;
+import java.sql.*;
import java.text.*;
import java.util.*;
import java.util.regex.*;
@@ -11,18 +12,16 @@ import java.util.regex.*;
import org.apache.commons.codec.*;
import org.apache.commons.codec.binary.*;
+import org.torproject.ernie.util.*;
+
/**
* Web page that allows users to search for relays in the descriptor
* archives.
*
- * Possible improvements:
- * - Make nickname search case-insensitive
- * - Instead of searching last 30 days, add date, month, or even year
- * parameter
- * - Make CONSENSUS_DIRECTORY configurable
- *
* Possible search terms for testing:
* - gabelmoo
+ * - gabelmoo 2010-09
+ * - gabelmoo 2010-09-18
* - gabelmoo F2044413DAC2E02E3D6BCF4735A19BCA1DE97281
* - gabelmoo 80.190.246
* - gabelmoo F2044413DAC2E02E3D6BCF4735A19BCA1DE97281 80.190.246
@@ -36,8 +35,8 @@ import org.apache.commons.codec.binary.*;
*/
public class RelaySearchServlet extends HttpServlet {
- private static Pattern alphaNumDotSpacePattern =
- Pattern.compile("[A-Za-z0-9\\. ]+");
+ private static Pattern alphaNumDotDashSpacePattern =
+ Pattern.compile("[A-Za-z0-9\\.\\- ]+");
private static Pattern numPattern = Pattern.compile("[0-9]+");
@@ -46,6 +45,42 @@ public class RelaySearchServlet extends HttpServlet {
private static Pattern alphaNumPattern =
Pattern.compile("[A-Za-z0-9]+");
+ private static SimpleDateFormat dayFormat =
+ new SimpleDateFormat("yyyy-MM-dd");
+
+ private static SimpleDateFormat monthFormat =
+ new SimpleDateFormat("yyyy-MM");
+
+ static {
+ dayFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ monthFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ private Connection conn = null;
+
+ public RelaySearchServlet() {
+
+ /* Try to load the database driver. */
+ try {
+ Class.forName("org.postgresql.Driver");
+ } catch (ClassNotFoundException e) {
+ /* Don't initialize conn and always reply to all requests with
+ * "500 internal server error". */
+ return;
+ }
+
+ /* Read JDBC URL from property file. */
+ ErnieProperties props = new ErnieProperties();
+ String connectionURL = props.getProperty("jdbc.url");
+
+ /* Try to connect to database. */
+ try {
+ conn = DriverManager.getConnection(connectionURL);
+ } catch (SQLException e) {
+ conn = null;
+ }
+ }
+
private void writeHeader(PrintWriter out) throws IOException {
out.println("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 "
+ "Transitional//EN\"\n"
@@ -139,24 +174,27 @@ public class RelaySearchServlet extends HttpServlet {
PrintWriter out = new PrintWriter(response.getWriter(), true);
writeHeader(out);
- /* Check if we have a consensuses directory. */
- File consensusDirectory = new File(CONSENSUS_DIRECTORY);
+ /* If we don't have a database, see if we have consensus on disk. */
SortedSet<File> consensusDirectories = new TreeSet<File>();
- if (consensusDirectory.exists() && consensusDirectory.isDirectory()) {
- for (File yearFile : consensusDirectory.listFiles()) {
- for (File monthFile : yearFile.listFiles()) {
- consensusDirectories.add(monthFile);
+ if (conn == null) {
+ /* Check if we have a consensuses directory. */
+ File consensusDirectory = new File(CONSENSUS_DIRECTORY);
+ if (consensusDirectory.exists() && consensusDirectory.isDirectory()) {
+ for (File yearFile : consensusDirectory.listFiles()) {
+ for (File monthFile : yearFile.listFiles()) {
+ consensusDirectories.add(monthFile);
+ }
}
}
- }
- if (consensusDirectories.isEmpty()) {
- out.println("<p><font color=\"red\"><b>Warning: </b></font>This "
- + "server doesn't have any relay lists available. If this "
- + "problem persists, please "
- + "<a href=\"mailto:tor-assistants@xxxxxxxxxxxxx\">let us "
- + "know</a>!</p>\n");
- writeFooter(out);
- return;
+ if (consensusDirectories.isEmpty()) {
+ out.println("<p><font color=\"red\"><b>Warning: </b></font>This "
+ + "server doesn't have any relay lists available. If this "
+ + "problem persists, please "
+ + "<a href=\"mailto:tor-assistants@xxxxxxxxxxxxx\">let us "
+ + "know</a>!</p>\n");
+ writeFooter(out);
+ return;
+ }
}
/* Read search parameter, if any. */
@@ -167,11 +205,13 @@ public class RelaySearchServlet extends HttpServlet {
/* Write search form. */
out.print(" <p>Search for a relay in the relay descriptor "
- + "archive by typing (part of) its <b>nickname</b>, "
- + "<b>fingerprint</b>, or <b>IP address</b> in the following "
- + "search field and clicking Search. The search will stop "
- + "after 30 hits or parsing 1 month of descriptors. Note that "
- + "the search can take up to 30 seconds.</p><br/>\n"
+ + "archive by typing (part of) a <b>nickname</b>, "
+ + "<b>fingerprint</b>, or <b>IP address</b> and optionally up "
+ + "to three <b>months (yyyy-mm)</b> or <b>days "
+ + "(yyyy-mm-dd)</b> in the following search field and "
+ + "clicking Search. The search will stop after 30 hits or, "
+ + "unless you provide a month or a day, after parsing the last "
+ + "30 days of relay lists.</p><br/>\n"
+ " <form action=\"relay-search.html\">\n"
+ " <table>\n"
+ " <tr>\n"
@@ -200,11 +240,13 @@ public class RelaySearchServlet extends HttpServlet {
String searchFingerprint = "";
String searchIPAddress = "";
SortedSet<String> searchFingerprintOrNickname = new TreeSet<String>();
+ SortedSet<String> searchDays = new TreeSet<String>();
+ SortedSet<String> searchMonths = new TreeSet<String>();
boolean validQuery = false;
/* Only parse search parameter if it contains nothing else than
* alphanumeric characters, dots, and spaces. */
- if (alphaNumDotSpacePattern.matcher(searchParameter).matches()) {
+ if (alphaNumDotDashSpacePattern.matcher(searchParameter).matches()) {
SortedSet<String> searchTerms = new TreeSet<String>();
if (searchParameter.trim().contains(" ")) {
String[] split = searchParameter.trim().split(" ");
@@ -251,6 +293,27 @@ public class RelaySearchServlet extends HttpServlet {
searchIPAddress = sb.toString().substring(1);
}
+ /* If the search term contains hyphens, it must be a month or a
+ * day. */
+ else if (searchTerm.contains("-") &&
+ searchTerm.startsWith("20")) {
+ try {
+ if (searchTerm.length() == 10) {
+ dayFormat.parse(searchTerm);
+ searchDays.add(searchTerm);
+ } else if (searchTerm.length() == 7) {
+ monthFormat.parse(searchTerm);
+ searchMonths.add(searchTerm);
+ } else {
+ validQuery = false;
+ break;
+ }
+ } catch (ParseException e) {
+ validQuery = false;
+ break;
+ }
+ }
+
/* If the search term contains between 8 and 19 hex characters, it
* could be either a nickname or a fingerprint. */
else if (searchTerm.length() >= 8 && searchTerm.length() <= 19 &&
@@ -313,14 +376,21 @@ public class RelaySearchServlet extends HttpServlet {
searchFingerprintOrNickname.clear();
}
+ /* We only accept at most three months or days, or people could
+ * accidentally keep the database busy. */
+ if (searchDays.size() + searchMonths.size() > 3) {
+ validQuery = false;
+ }
+
/* If the query is invalid, print out a general warning. */
if (!validQuery) {
out.write(" <p>Sorry, I didn't understand your query. "
+ "Please provide a nickname (e.g., \"gabelmoo\"), at least "
+ "the first 8 hex characters of a fingerprint (e.g., "
+ "\"F2044413\"), or at least the first two octets of an IPv4 "
- + "address in dotted-decimal notation (e.g., \"80.190\")."
- + "</p>\n");
+ + "address in dotted-decimal notation (e.g., \"80.190\"). You "
+ + "can also provide at most three months or days in ISO 8601 "
+ + "format (e.g., \"2010-09\" or \"2010-09-17\").</p>\n");
writeFooter(out);
return;
}
@@ -342,21 +412,157 @@ public class RelaySearchServlet extends HttpServlet {
recognizedSearchTerms.add("IP address <b>" + searchIPAddress
+ "</b>");
}
+ List<String> recognizedIntervals = new ArrayList<String>();
+ for (String searchTerm : searchMonths) {
+ recognizedIntervals.add("in <b>" + searchTerm + "</b>");
+ }
+ for (String searchTerm : searchDays) {
+ recognizedIntervals.add("on <b>" + searchTerm + "</b>");
+ }
out.write(" <p>Searching for relays with ");
if (recognizedSearchTerms.size() == 1) {
- out.write(recognizedSearchTerms.get(0) + " ...</p>\n");
+ out.write(recognizedSearchTerms.get(0));
} else if (recognizedSearchTerms.size() == 2) {
out.write(recognizedSearchTerms.get(0) + " and "
- + recognizedSearchTerms.get(1) + " ...</p>\n");
+ + recognizedSearchTerms.get(1));
} else {
for (int i = 0; i < recognizedSearchTerms.size() - 1; i++) {
out.write(recognizedSearchTerms.get(i) + ", ");
}
out.write("and " + recognizedSearchTerms.get(
- recognizedSearchTerms.size() - 1) + " ...</p>\n");
+ recognizedSearchTerms.size() - 1));
+ }
+ if (recognizedIntervals.size() == 1) {
+ out.write(" running " + recognizedIntervals.get(0));
+ } else if (recognizedIntervals.size() == 2) {
+ out.write(" running " + recognizedIntervals.get(0) + " and/or "
+ + recognizedIntervals.get(1));
+ } else if (recognizedIntervals.size() > 2) {
+ out.write(" running ");
+ for (int i = 0; i < recognizedIntervals.size() - 1; i++) {
+ out.write(recognizedIntervals.get(i) + ", ");
+ }
+ out.write("and/or " + recognizedIntervals.get(
+ recognizedIntervals.size() - 1));
}
+ out.write(" ...</p>\n");
out.flush();
+ /* If we have a database connection, search relays in the database. */
+ if (conn != null) {
+
+ StringBuilder query = new StringBuilder("SELECT validafter, "
+ + "rawdesc FROM statusentry WHERE ");
+ boolean addAnd = false;
+ if (searchDays.size() > 0 || searchMonths.size() > 0) {
+ boolean addOr = false;
+ query.append("(");
+ for (String search : searchDays) {
+ query.append((addOr ? "OR " : "") + "DATE_TRUNC('day', "
+ + "validafter) = '" + search + " 00:00:00' ");
+ addOr = true;
+ }
+ for (String search : searchMonths) {
+ query.append((addOr ? "OR " : "") + "DATE_TRUNC('month', "
+ + "validafter) = '" + search + "-01 00:00:00' ");
+ addOr = true;
+ }
+ query.append(") ");
+ } else {
+ query.append("DATE_TRUNC('day', validafter) >= '"
+ + dayFormat.format(started - 30L * 24L * 60L * 60L * 1000L)
+ + " 00:00:00' ");
+ }
+ if (searchNickname.length() > 0) {
+ query.append("AND LOWER(nickname) LIKE '"
+ + searchNickname.toLowerCase() + "%' ");
+ }
+ if (searchFingerprint.length() > 0) {
+ query.append("AND fingerprint LIKE '"
+ + searchFingerprint.toLowerCase() + "%' ");
+ }
+ if (searchIPAddress.length() > 0) {
+ query.append("AND address LIKE '" + searchIPAddress + "%' ");
+ }
+ for (String search : searchFingerprintOrNickname) {
+ query.append("AND (LOWER(nickname) LIKE '" + search.toLowerCase()
+ + "%' OR fingerprint LIKE '" + search.toLowerCase() + "%') ");
+ }
+ query.append("ORDER BY validafter DESC, fingerprint LIMIT 31");
+ int matches = 0;
+ long startedQuery = System.currentTimeMillis();
+ try {
+ Statement statement = conn.createStatement();
+ ResultSet rs = statement.executeQuery(query.toString());
+ String lastValidAfter = null;
+ while (rs.next()) {
+ matches++;
+ if (matches > 30) {
+ break;
+ }
+ String validAfter = rs.getTimestamp(1).toString().
+ substring(0, 19);
+ if (!validAfter.equals(lastValidAfter)) {
+ out.println(" <br/><tt>valid-after "
+ + "<a href=\"consensus?valid-after="
+ + validAfter.replaceAll(":", "-").replaceAll(" ", "-")
+ + "\" target=\"_blank\">" + validAfter + "</a></tt><br/>");
+ lastValidAfter = validAfter;
+ out.flush();
+ }
+ byte[] rawStatusEntry = rs.getBytes(2);
+ try {
+ String statusEntryLines = new String(rawStatusEntry,
+ "US-ASCII");
+ String[] lines = statusEntryLines.split("\n");
+ for (String line : lines) {
+ if (line.startsWith("r ")) {
+ String[] parts = line.split(" ");
+ String descriptor = String.format("%040x",
+ new BigInteger(1, Base64.decodeBase64(parts[3]
+ + "==")));
+ out.println(" <tt>r " + parts[1] + " " + parts[2] + " "
+ + "<a href=\"descriptor.html?desc-id=" + descriptor
+ + "\" target=\"_blank\">" + parts[3] + "</a> "
+ + parts[4] + " " + parts[5] + " " + parts[6] + " "
+ + parts[7] + " " + parts[8] + "</tt><br/>");
+ } else {
+ out.println(" <tt>" + line + "</tt><br/>");
+ }
+ }
+ out.println(" <br/>");
+ out.flush();
+ } catch (UnsupportedEncodingException e) {
+ /* This shouldn't happen, because we know that ASCII is
+ * supported. */
+ }
+ }
+ statement.close();
+ } catch (SQLException e) {
+ out.println("<p><font color=\"red\"><b>Warning: </b></font>We "
+ + "experienced an unknown database problem while running the "
+ + "search. The query was '" + query + "'. If this problem "
+ + "persists, please "
+ + "<a href=\"mailto:tor-assistants@xxxxxxxxxxxxx\">let us "
+ + "know</a>!</p>\n");
+ writeFooter(out);
+ return;
+ }
+
+ /* Display total search time on the results page. */
+ long searchTime = System.currentTimeMillis() - started;
+ long queryTime = System.currentTimeMillis() - startedQuery;
+ out.write(" <br/><p>Found " + (matches > 30 ? "more than 30"
+ : "" + matches) + " relays " + (matches > 30 ?
+ "(displaying only the first 30 hits) " : "") + "in "
+ + String.format("%d.%03d", searchTime / 1000, searchTime % 1000)
+ + " seconds.</p>\n");
+
+ /* Finish writing response. */
+ writeFooter(out);
+ return;
+ }
+
/* Compile a regular expression pattern to parse r lines more
* quickly. */
StringBuilder patternBuilder = new StringBuilder("r ");
--
1.7.1