[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-web/release] Adapt webstats to read logs from CollecTor.
commit 7be397890d2ff66d7479b52aa245afdd3d487f9d
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Thu Mar 22 21:37:28 2018 +0100
Adapt webstats to read logs from CollecTor.
In this change we:
- update metrics-lib to 2.2.0,
- start downloading and processing logs from CollecTor rather than
from webstats.torproject.org,
- change log line counts from int to long,
- remove webstats tests which are now contained in metrics-lib,
- update the CollecTor page, and
- take out the beta notice from the Tor web server logs page.
Implements #25520.
---
build.xml | 2 +-
.../metrics/stats/collectdescs/Main.java | 3 +-
.../torproject/metrics/stats/webstats/Main.java | 223 ++++++---------------
src/main/resources/spec/web-server-logs.xml | 4 -
src/main/resources/web/jsps/collector.jsp | 38 ++++
src/main/resources/web/jsps/web-server-logs.jsp | 16 +-
src/main/sql/webstats/init-webstats.sql | 2 +-
.../metrics/stats/webstats/MainTest.java | 110 ----------
8 files changed, 106 insertions(+), 292 deletions(-)
diff --git a/build.xml b/build.xml
index e98757e..57eab68 100644
--- a/build.xml
+++ b/build.xml
@@ -9,7 +9,7 @@
<property name="javadoc-title" value="MetricsWeb API Documentation"/>
<property name="implementation-title" value="metrics-web" />
<property name="release.version" value="1.0.3-dev" />
- <property name="metricslibversion" value="2.1.1" />
+ <property name="metricslibversion" value="2.2.0" />
<property name="jetty.version" value="-9.2.21.v20170120" />
<property name="warfile"
value="metrics-web-${release.version}.war"/>
diff --git a/src/main/java/org/torproject/metrics/stats/collectdescs/Main.java b/src/main/java/org/torproject/metrics/stats/collectdescs/Main.java
index 04dc86d..4c64425 100644
--- a/src/main/java/org/torproject/metrics/stats/collectdescs/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/collectdescs/Main.java
@@ -24,7 +24,8 @@ public class Main {
"/recent/relay-descriptors/consensuses/",
"/recent/relay-descriptors/extra-infos/",
"/recent/relay-descriptors/server-descriptors/",
- "/recent/torperf/"
+ "/recent/torperf/",
+ "/recent/webstats/"
}, 0L, new File("../../shared/in"), true);
}
}
diff --git a/src/main/java/org/torproject/metrics/stats/webstats/Main.java b/src/main/java/org/torproject/metrics/stats/webstats/Main.java
index f70963f..5d11114 100644
--- a/src/main/java/org/torproject/metrics/stats/webstats/Main.java
+++ b/src/main/java/org/torproject/metrics/stats/webstats/Main.java
@@ -3,14 +3,19 @@
package org.torproject.metrics.stats.webstats;
-import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+import static java.util.stream.Collectors.counting;
+import static java.util.stream.Collectors.groupingByConcurrent;
+
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.WebServerAccessLog;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.BufferedReader;
+import java.io.File;
import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -23,19 +28,17 @@ import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DateFormat;
-import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Calendar;
-import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.SortedSet;
import java.util.TimeZone;
import java.util.TreeSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
/** Main class of the webstats module that downloads log files from the server,
* imports them into a database, and exports aggregate statistics to a CSV
@@ -45,26 +48,6 @@ public class Main {
/** Logger for this class. */
private static Logger log = LoggerFactory.getLogger(Main.class);
- /** Pattern for links contained in directory listings. */
- static final Pattern URL_STRING_PATTERN =
- Pattern.compile(".*<a href=\"([^\"]+)\">.*");
-
- static final Pattern LOG_FILE_URL_PATTERN =
- Pattern.compile("^.*/([^/]+)/([^/]+)-access.log-(\\d{8}).xz$");
-
- private static DateFormat logDateFormat;
-
- static {
- logDateFormat = new SimpleDateFormat("yyyyMMdd");
- logDateFormat.setLenient(false);
- logDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- }
-
- static final Pattern LOG_LINE_PATTERN = Pattern.compile(
- "^0.0.0.[01] - - \\[\\d{2}/\\w{3}/\\d{4}:00:00:00 \\+0000\\] "
- + "\"(GET|HEAD) ([^ ]{1,2048}) HTTP[^ ]+\" (\\d+) (-|\\d+) \"-\" \"-\" "
- + "-$");
-
private static final String LOG_DATE = "log_date";
private static final String REQUEST_TYPE = "request_type";
@@ -88,12 +71,10 @@ public class Main {
log.info("Starting webstats module.");
String dbUrlString = "jdbc:postgresql:webstats";
Connection connection = connectToDatabase(dbUrlString);
- SortedSet<String> previouslyImportedLogFileUrls =
- queryImportedFiles(connection);
- String baseUrl = "https://webstats.torproject.org/out/";
- SortedSet<String> newLogFileUrls = downloadDirectoryListings(baseUrl,
- previouslyImportedLogFileUrls);
- importLogFiles(connection, newLogFileUrls);
+ SortedSet<String> skipFiles = queryImportedFileNames(connection);
+ importLogFiles(connection, skipFiles,
+ new File("../../shared/in/recent/webstats"),
+ new File("../../shared/in/archive/webstats"));
SortedSet<String> statistics = queryWebstats(connection);
writeStatistics(Paths.get("stats", "webstats.csv"), statistics);
disconnectFromDatabase(connection);
@@ -109,79 +90,55 @@ public class Main {
return connection;
}
- static SortedSet<String> queryImportedFiles(Connection connection)
+ static SortedSet<String> queryImportedFileNames(Connection connection)
throws SQLException {
- log.info("Querying URLs of previously imported log files.");
+ log.info("Querying previously imported log files.");
SortedSet<String> importedLogFileUrls = new TreeSet<>();
Statement st = connection.createStatement();
- String queryString = "SELECT url FROM files";
+ String queryString = "SELECT server, site, log_date FROM files";
+ DateTimeFormatter dateFormat = DateTimeFormatter.ofPattern("yyyyMMdd");
try (ResultSet rs = st.executeQuery(queryString)) {
while (rs.next()) {
- importedLogFileUrls.add(rs.getString(1));
+ importedLogFileUrls.add(String.format("%s_%s_access.log_%s.xz",
+ rs.getString(1), rs.getString(2),
+ rs.getDate(3).toLocalDate().format(dateFormat)));
}
}
- log.info("Found {} URLs of previously imported log files.",
+ log.info("Found {} previously imported log files.",
importedLogFileUrls.size());
return importedLogFileUrls;
}
- static SortedSet<String> downloadDirectoryListings(String baseUrl,
- SortedSet<String> importedLogFileUrls) throws IOException {
- log.info("Downloading directory listings from {}.", baseUrl);
- List<String> directoryListings = new ArrayList<>();
- directoryListings.add(baseUrl);
- SortedSet<String> newLogFileUrls = new TreeSet<>();
- while (!directoryListings.isEmpty()) {
- String urlString = directoryListings.remove(0);
- if (urlString.endsWith("/")) {
- directoryListings.addAll(downloadDirectoryListing(urlString));
- } else if (!urlString.endsWith(".xz")) {
- log.debug("Skipping unrecognized URL {}.", urlString);
- } else if (!importedLogFileUrls.contains(urlString)) {
- newLogFileUrls.add(urlString);
+ static void importLogFiles(Connection connection, SortedSet<String> skipFiles,
+ File... inDirectories) {
+ for (Descriptor descriptor : DescriptorSourceFactory
+ .createDescriptorReader().readDescriptors(inDirectories)) {
+ if (!(descriptor instanceof WebServerAccessLog)) {
+ continue;
}
- }
- log.info("Found {} URLs of log files that have not yet been imported.",
- newLogFileUrls.size());
- return newLogFileUrls;
- }
-
- static List<String> downloadDirectoryListing(String urlString)
- throws IOException {
- log.debug("Downloading directory listing from {}.", urlString);
- List<String> urlStrings = new ArrayList<>();
- try (BufferedReader br = new BufferedReader(new InputStreamReader(
- new URL(urlString).openStream()))) {
- String line;
- while ((line = br.readLine()) != null) {
- Matcher matcher = URL_STRING_PATTERN.matcher(line);
- if (matcher.matches() && !matcher.group(1).startsWith("/")) {
- urlStrings.add(urlString + matcher.group(1));
- }
+ WebServerAccessLog logFile = (WebServerAccessLog) descriptor;
+ if (skipFiles.contains(logFile.getDescriptorFile().getName())) {
+ continue;
}
- }
- return urlStrings;
- }
-
- static void importLogFiles(Connection connection,
- SortedSet<String> newLogFileUrls) {
- log.info("Downloading, parsing, and importing {} log files.",
- newLogFileUrls.size());
- for (String urlString : newLogFileUrls) {
try {
- Object[] metaData = parseMetaData(urlString);
- if (metaData == null) {
- continue;
- }
- Map<String, Integer> parsedLogLines = downloadAndParseLogFile(
- urlString);
- importLogLines(connection, urlString, metaData, parsedLogLines);
- } catch (IOException | ParseException exc) {
- log.warn("Cannot download or parse log file with URL {}. Retrying "
- + "in the next run.", urlString, exc);
+ Map<String, Long> parsedLogLines = logFile.logLines().parallel()
+ /* The following mapping can be removed with metrics-lib
+ version > 2.2.0 */
+ .map(line -> (WebServerAccessLog.Line) line)
+ .collect(groupingByConcurrent(line
+ -> String.format("%s %s %d", line.getMethod().name(),
+ truncateString(line.getRequest(), 2048), line.getResponse()),
+ counting()));
+ importLogLines(connection, logFile.getDescriptorFile().getName(),
+ logFile.getPhysicalHost(), logFile.getVirtualHost(),
+ logFile.getLogDate(), parsedLogLines);
+ } catch (DescriptorParseException exc) {
+ log.warn("Cannot parse log file with file name {}. Retrying in the "
+ + "next run.", logFile.getDescriptorFile().getName(), exc);
} catch (SQLException exc) {
- log.warn("Cannot import log file with URL {} into the database. "
- + "Rolling back and retrying in the next run.", urlString, exc);
+ log.warn("Cannot import log file with file name {} into the database. "
+ + "Rolling back and retrying in the next run.",
+ logFile.getDescriptorFile().getName(), exc);
try {
connection.rollback();
} catch (SQLException exceptionWhileRollingBack) {
@@ -191,68 +148,9 @@ public class Main {
}
}
- private static Object[] parseMetaData(String urlString)
- throws ParseException {
- log.debug("Importing log file {}.", urlString);
- if (urlString.contains("-ssl-access.log-")) {
- log.debug("Skipping log file containing SSL requests with URL {}.",
- urlString);
- return null;
- }
- Matcher logFileUrlMatcher = LOG_FILE_URL_PATTERN.matcher(urlString);
- if (!logFileUrlMatcher.matches()) {
- log.debug("Skipping log file with unrecognized URL {}.", urlString);
- return null;
- }
- String server = logFileUrlMatcher.group(1);
- String site = logFileUrlMatcher.group(2);
- long logDateMillis = logDateFormat.parse(logFileUrlMatcher.group(3))
- .getTime();
- return new Object[] { server, site, logDateMillis };
- }
-
- static Map<String, Integer> downloadAndParseLogFile(String urlString)
- throws IOException {
- int skippedLines = 0;
- Map<String, Integer> parsedLogLines = new HashMap<>();
- try (BufferedReader br = new BufferedReader(new InputStreamReader(
- new XZCompressorInputStream(new URL(urlString).openStream())))) {
- String line;
- while ((line = br.readLine()) != null) {
- if (!parseLogLine(line, parsedLogLines)) {
- skippedLines++;
- }
- }
- }
- if (skippedLines > 0) {
- log.debug("Skipped {} lines while parsing log file {}.", skippedLines,
- urlString);
- }
- return parsedLogLines;
- }
-
- static boolean parseLogLine(String logLine,
- Map<String, Integer> parsedLogLines) {
- Matcher logLineMatcher = LOG_LINE_PATTERN.matcher(logLine);
- if (!logLineMatcher.matches()) {
- return false;
- }
- String method = logLineMatcher.group(1);
- String resource = logLineMatcher.group(2);
- int responseCode = Integer.parseInt(logLineMatcher.group(3));
- String combined = String.format("%s %s %d", method, resource,
- responseCode);
- if (!parsedLogLines.containsKey(combined)) {
- parsedLogLines.put(combined, 1);
- } else {
- parsedLogLines.put(combined, parsedLogLines.get(combined) + 1);
- }
- return true;
- }
-
private static void importLogLines(Connection connection, String urlString,
- Object[] metaData, Map<String, Integer> parsedLogLines)
- throws SQLException {
+ String server, String site, LocalDate logDate,
+ Map<String, Long> parsedLogLines) throws SQLException {
PreparedStatement psFiles = connection.prepareStatement(
"INSERT INTO files (url, server, site, " + LOG_DATE + ") "
+ "VALUES (?, ?, ?, ?)", Statement.RETURN_GENERATED_KEYS);
@@ -264,20 +162,17 @@ public class Main {
PreparedStatement psRequests = connection.prepareStatement(
"INSERT INTO requests (file_id, method, resource_id, response_code, "
+ COUNT + ") VALUES (?, CAST(? AS method), ?, ?, ?)");
- String server = (String) metaData[0];
- String site = (String) metaData[1];
- long logDateMillis = (long) metaData[2];
- int fileId = insertFile(psFiles, urlString, server, site, logDateMillis);
+ int fileId = insertFile(psFiles, urlString, server, site, logDate);
if (fileId < 0) {
log.debug("Skipping previously imported log file {}.", urlString);
return;
}
- for (Map.Entry<String, Integer> requests : parsedLogLines.entrySet()) {
+ for (Map.Entry<String, Long> requests : parsedLogLines.entrySet()) {
String[] keyParts = requests.getKey().split(" ");
String method = keyParts[0];
String resource = keyParts[1];
int responseCode = Integer.parseInt(keyParts[2]);
- int count = requests.getValue();
+ long count = requests.getValue();
int resourceId = insertResource(psResourcesSelect, psResourcesInsert,
resource);
if (resourceId < 0) {
@@ -290,18 +185,18 @@ public class Main {
count);
}
connection.commit();
- log.debug("Finished importing log file with URL {} into database.",
+ log.debug("Finished importing log file with file name {} into database.",
urlString);
}
private static int insertFile(PreparedStatement psFiles, String urlString,
- String server, String site, long logDateMillis) throws SQLException {
+ String server, String site, LocalDate logDate) throws SQLException {
int fileId = -1;
psFiles.clearParameters();
psFiles.setString(1, truncateString(urlString, 2048));
psFiles.setString(2, truncateString(server, 32));
psFiles.setString(3, truncateString(site, 128));
- psFiles.setDate(4, new Date(logDateMillis));
+ psFiles.setDate(4, Date.valueOf(logDate));
psFiles.execute();
try (ResultSet rs = psFiles.getGeneratedKeys()) {
if (rs.next()) {
@@ -312,14 +207,14 @@ public class Main {
}
private static void insertRequest(PreparedStatement psRequests, int fileId,
- String method, int resourceId, int responseCode, int count)
+ String method, int resourceId, int responseCode, long count)
throws SQLException {
psRequests.clearParameters();
psRequests.setInt(1, fileId);
psRequests.setString(2, method);
psRequests.setInt(3, resourceId);
psRequests.setInt(4, responseCode);
- psRequests.setInt(5, count);
+ psRequests.setLong(5, count);
psRequests.execute();
}
diff --git a/src/main/resources/spec/web-server-logs.xml b/src/main/resources/spec/web-server-logs.xml
index c180f8c..5c2011f 100644
--- a/src/main/resources/spec/web-server-logs.xml
+++ b/src/main/resources/spec/web-server-logs.xml
@@ -20,10 +20,6 @@
</front>
<middle>
<section title="Purpose of this document">
- <t>BETA: As of November 14, 2017, this document is still under
- discussion and subject to change without prior notice. Feel free
- to <eref target="/about.html#contact">contact us</eref> for questions or
- concerns regarding this document.</t>
<t>Tor's web servers, like most web servers, keep request logs for
maintenance and informational purposes.</t>
<t>However, unlike most other web servers, Tor's web servers use a
diff --git a/src/main/resources/web/jsps/collector.jsp b/src/main/resources/web/jsps/collector.jsp
index 33ae7dd..13865ba 100644
--- a/src/main/resources/web/jsps/collector.jsp
+++ b/src/main/resources/web/jsps/collector.jsp
@@ -168,6 +168,15 @@
<td><a href="/collector/recent/torperf/" class="btn btn-primary btn-xs pull-left"><i class="fa fa-chevron-right" aria-hidden="true"></i> recent</a>
<a href="/collector/archive/torperf/" class="btn btn-primary btn-xs pull-right"><i class="fa fa-chevron-right" aria-hidden="true"></i> archive</a></td>
</tr>
+<tr class="tableHeadline">
+ <td colspan="3"><b><a href="#webstats">Tor web server logs</a></b></td>
+</tr>
+<tr>
+ <td><a href="#type-webstats">Tor web server logs</a></td>
+ <td></td>
+ <td><a href="/collector/recent/webstats/" class="btn btn-primary btn-xs pull-left"><i class="fa fa-chevron-right" aria-hidden="true"></i> recent</a>
+ <a href="/collector/archive/webstats/" class="btn btn-primary btn-xs pull-right"><i class="fa fa-chevron-right" aria-hidden="true"></i> archive</a></td>
+</tr>
</tbody>
</table>
@@ -694,6 +703,35 @@ measurement; optional.</li>
<li><code>SOURCEADDRESS:</code> Public IP address of the OnionPerf host obtained by connecting to well-known servers and finding the IP address in the result, which may be <code>"unknown"</code> if OnionPerf was not able to find this information; optional.</li>
</ul>
+
+
+<br>
+<h2 id="webstats" class="hover">Tor web server logs
+<a href="#webstats" class="anchor">#</a>
+</h2>
+
+<p>
+Tor's web servers, like most web servers, keep request logs for maintenance and
+informational purposes.
+However, unlike most other web servers, Tor's web servers use a privacy-aware
+log format that avoids logging too sensitive data about their users.
+Also unlike most other web server logs, Tor's logs are neither archived nor
+analyzed before performing a number of post-processing steps to further reduce
+any privacy-sensitive parts.
+</p>
+
+<h3 id="type-webstats" class="hover">Tor web server logs
+<a href="/collector/recent/webstats/" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> recent</a>
+<a href="/collector/archive/webstats/" class="btn btn-primary btn-xs"><i class="fa fa-chevron-right" aria-hidden="true"></i> archive</a>
+<a href="#type-webstats" class="anchor">#</a>
+</h3>
+
+<p>
+The data format and sanitizing steps for Tor web server logs are specified in
+detail on a separate <a href="web-server-logs.html">page</a>.
+</p>
+
+
</div>
<br>
diff --git a/src/main/resources/web/jsps/web-server-logs.jsp b/src/main/resources/web/jsps/web-server-logs.jsp
index 8832b2a..530b2ab 100644
--- a/src/main/resources/web/jsps/web-server-logs.jsp
+++ b/src/main/resources/web/jsps/web-server-logs.jsp
@@ -22,37 +22,31 @@
"#rfc.section.1">1.</a> <a href=
"#n-purpose-of-this-document">Purpose of this document</a></h2>
<div id="rfc.section.1.p.1">
-<p>BETA: As of November 14, 2017, this document is still under
-discussion and subject to change without prior notice. Feel free to
-<a href="/about.html#contact">contact us</a> for questions or
-concerns regarding this document.</p>
-</div>
-<div id="rfc.section.1.p.2">
<p>Tor's web servers, like most web servers, keep request logs for
maintenance and informational purposes.</p>
</div>
-<div id="rfc.section.1.p.3">
+<div id="rfc.section.1.p.2">
<p>However, unlike most other web servers, Tor's web servers use a
privacy-aware log format that avoids logging too sensitive data
about their users.</p>
</div>
-<div id="rfc.section.1.p.4">
+<div id="rfc.section.1.p.3">
<p>Also unlike most other web server logs, Tor's logs are neither
archived nor analyzed before performing a number of post-processing
steps to further reduce any privacy-sensitive parts.</p>
</div>
-<div id="rfc.section.1.p.5">
+<div id="rfc.section.1.p.4">
<p>This document describes 1) meta-data contained in log file names
written by Tor's web servers, 2) the privacy-aware log format used
in these files, and 3) subsequent sanitizing steps that are applied
before archiving and analyzing these log files.</p>
</div>
-<div id="rfc.section.1.p.6">
+<div id="rfc.section.1.p.5">
<p>As a basis for our current implementation this document also
describes the naming conventions for the input log files, which is
just a description of the current state and subject to change.</p>
</div>
-<div id="rfc.section.1.p.7">
+<div id="rfc.section.1.p.6">
<p>As a convention for this document, all format strings conform to
the format strings used by <a href=
"http://httpd.apache.org/docs/current/mod/mod_log_config.html">Apache's
diff --git a/src/main/sql/webstats/init-webstats.sql b/src/main/sql/webstats/init-webstats.sql
index e44205f..1396fa5 100644
--- a/src/main/sql/webstats/init-webstats.sql
+++ b/src/main/sql/webstats/init-webstats.sql
@@ -22,7 +22,7 @@ CREATE TABLE requests (
method METHOD NOT NULL,
resource_id INTEGER REFERENCES resources (resource_id) NOT NULL,
response_code SMALLINT NOT NULL,
- count INTEGER NOT NULL,
+ count BIGINT NOT NULL,
UNIQUE (file_id, method, resource_id, response_code)
);
diff --git a/src/test/java/org/torproject/metrics/stats/webstats/MainTest.java b/src/test/java/org/torproject/metrics/stats/webstats/MainTest.java
deleted file mode 100644
index a4e88d1..0000000
--- a/src/test/java/org/torproject/metrics/stats/webstats/MainTest.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Copyright 2017--2018 The Tor Project
- * See LICENSE for licensing information */
-
-package org.torproject.metrics.stats.webstats;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Test;
-
-import java.util.regex.Matcher;
-
-public class MainTest {
-
- static final String SAMPLE_LOG_FILE_NAME =
- "metrics.torproject.org-access.log-20170117.xz";
-
- static final String SAMPLE_SUBDIRECTORY_NAME = "meronense.torproject.org/";
-
- static final String SAMPLE_LOG_FILE_URL =
- "https://webstats.torproject.org/out/meronense.torproject.org/"
- + "metrics.torproject.org-access.log-20170117.xz";
-
- static final String[] SAMPLE_LOG_LINES = new String[] {
- "0.0.0.0 - - [17/Jan/2017:00:00:00 +0000] "
- + "\"GET / HTTP/1.0\" 200 10532 \"-\" \"-\" -",
- "0.0.0.0 - - [17/Jan/2017:00:00:00 +0000] "
- + "\"HEAD /bubbles.html HTTP/1.1\" 200 - \"-\" \"-\" -"
- };
-
- @Test
- public void testUrlStringPatternComplete() {
- Matcher matcher = Main.URL_STRING_PATTERN.matcher(
- "<img src=\"/icons/unknown.gif\" alt=\"[ ]\"> "
- + "<a href=\"" + SAMPLE_LOG_FILE_NAME + "\">" + SAMPLE_LOG_FILE_NAME
- + "</a> 2017-01-19 19:43 5.6K ");
- assertTrue(matcher.matches());
- assertEquals(SAMPLE_LOG_FILE_NAME, matcher.group(1));
- }
-
- @Test
- public void testUrlStringPatternOnlyATag() {
- Matcher matcher = Main.URL_STRING_PATTERN.matcher("<a href=\""
- + SAMPLE_LOG_FILE_NAME + "\">" + SAMPLE_LOG_FILE_NAME + "</a>");
- assertTrue(matcher.matches());
- assertEquals(SAMPLE_LOG_FILE_NAME, matcher.group(1));
- }
-
- @Test
- public void testUrlStringPatternSubdirectory() {
- Matcher matcher = Main.URL_STRING_PATTERN.matcher(
- "<a href=\"" + SAMPLE_SUBDIRECTORY_NAME + "\">"
- + SAMPLE_SUBDIRECTORY_NAME + "/</a>");
- assertTrue(matcher.matches());
- assertEquals(SAMPLE_SUBDIRECTORY_NAME, matcher.group(1));
- }
-
- @Test
- public void testUrlStringPatternAnythingBetweenDoubleQuotesHtml() {
- Matcher matcher = Main.URL_STRING_PATTERN.matcher(
- "<a href=\"anything-between-double-quotes.html\">Link/</a>");
- assertTrue(matcher.matches());
- assertEquals("anything-between-double-quotes.html", matcher.group(1));
- }
-
- @Test
- public void testLogFileUrlPatternComplete() {
- Matcher matcher = Main.LOG_FILE_URL_PATTERN.matcher(SAMPLE_LOG_FILE_URL);
- assertTrue(matcher.matches());
- assertEquals("meronense.torproject.org", matcher.group(1));
- assertEquals("metrics.torproject.org", matcher.group(2));
- assertEquals("20170117", matcher.group(3));
- }
-
- @Test
- public void testLogLinePatternGetSlash() {
- Matcher matcher = Main.LOG_LINE_PATTERN.matcher(SAMPLE_LOG_LINES[0]);
- assertTrue(matcher.matches());
- assertEquals("GET", matcher.group(1));
- assertEquals("/", matcher.group(2));
- assertEquals("200", matcher.group(3));
- }
-
- @Test
- public void testLogLinePatternHeadBubbles() {
- Matcher matcher = Main.LOG_LINE_PATTERN.matcher(SAMPLE_LOG_LINES[1]);
- assertTrue(matcher.matches());
- assertEquals("HEAD", matcher.group(1));
- assertEquals("/bubbles.html", matcher.group(2));
- assertEquals("200", matcher.group(3));
- }
-
- @Test
- public void testLogLinePatternMaxLength() {
- int maxLength = 2048;
- String pre = "0.0.0.0 - - [17/Jan/2017:00:00:00 +0000] \"GET ";
- String post = " HTTP/1.0\" 200 10532 \"-\" \"-\" -";
- StringBuilder sb = new StringBuilder();
- while (sb.length() <= maxLength) {
- sb.append("/https://www.torproject.org");
- }
- String tooLongLogLine = pre + sb.toString() + post;
- assertFalse(Main.LOG_LINE_PATTERN.matcher(tooLongLogLine).matches());
- String notTooLongLogLine = pre + sb.toString().substring(0, maxLength)
- + post;
- assertTrue(Main.LOG_LINE_PATTERN.matcher(notTooLongLogLine).matches());
- }
-}
-
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits