[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [collector/master] Add some real tests for the webstats module.
commit 3002d6bc6b6bf84953cf842cbf6b3b18dc944879
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed Dec 11 12:16:05 2019 +0100
Add some real tests for the webstats module.
---
src/build | 2 +-
.../collector/webstats/SanitizeWeblogsTest.java | 290 ++++++++++++++++++++-
2 files changed, 287 insertions(+), 5 deletions(-)
diff --git a/src/build b/src/build
index eb16cb3..264e498 160000
--- a/src/build
+++ b/src/build
@@ -1 +1 @@
-Subproject commit eb16cb359db41722e6089bafb1e26808df4338df
+Subproject commit 264e498f54a20f7d299daaf2533d043f880e6a8b
diff --git a/src/test/java/org/torproject/metrics/collector/webstats/SanitizeWeblogsTest.java b/src/test/java/org/torproject/metrics/collector/webstats/SanitizeWeblogsTest.java
index a550c41..21617b5 100644
--- a/src/test/java/org/torproject/metrics/collector/webstats/SanitizeWeblogsTest.java
+++ b/src/test/java/org/torproject/metrics/collector/webstats/SanitizeWeblogsTest.java
@@ -4,17 +4,299 @@
package org.torproject.metrics.collector.webstats;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.WebServerAccessLog;
+import org.torproject.metrics.collector.Main;
+import org.torproject.metrics.collector.conf.Configuration;
+import org.torproject.metrics.collector.conf.Key;
+
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.junit.Before;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
public class SanitizeWeblogsTest {
+ /** Sample original web server access logs as input for tests. */
+ private static final String[][] inputLogs = new String[][] {
+ { "metrics.torproject.org-access.log-20191120.gz",
+ "0.0.0.0 - - [19/Nov/2019:00:00:00 +0000] "
+ + "\"GET /networksize.html HTTP/1.1\" 200 3269 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [19/Nov/2019:00:00:00 +0000] "
+ + "\"GET /networksize.png?start=2019-08-21&end=2019-11-19 HTTP/1.1\" "
+ + "200 39383 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [19/Nov/2019:00:00:00 +0000] "
+ + "\"GET /userstats-relay-country.html HTTP/1.1\" 200 7350 "
+ + "\"-\" \"-\" -\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"GET /collector/recent/relay-descriptors/ HTTP/1.1\" 200 10227 "
+ + "\"-\" \"-\" -\n" },
+ { "metrics.torproject.org-access.log-20191121.gz",
+ "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"HEAD /collector/recent/relay-descriptors/microdescs/ "
+ + "HTTP/1.1\" 200 - \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"HEAD /collector/recent/exit-lists/ HTTP/1.1\" 200 "
+ + "- \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"GET /collector/archive/bridge-descriptors/extra-infos/ "
+ + "HTTP/1.1\" 200 48013 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] "
+ + "\"GET /images/cc/sk.png HTTP/1.1\" 200 395 \"-\" \"-\" -\n" },
+ { "metrics.torproject.org-access.log-20191122.gz",
+ "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] "
+ + "\"GET /images/favicon.ico HTTP/1.1\" 200 1150 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] "
+ + "\"GET /images/flags/authority.png HTTP/1.1\" 200 325 "
+ + "\"https://metrics.torproject.org/rs.html\" \"-\" -\n"
+ + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] "
+ + "\"GET /news.atom HTTP/1.1\" 200 36362 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [22/Nov/2019:00:00:00 +0000] "
+ + "\"GET /onionperf-buildtimes.csv HTTP/1.1\" 200 270336 "
+ + "\"-\" \"-\" -\n" },
+ { "metrics.torproject.org-access.log-20191123.gz",
+ "0.0.0.0 - - [22/Nov/2019:00:00:00 +0000] "
+ + "\"GET /userstats-relay-country.html?"
+ + "start=2010-01-01&end=2019-11-22&country=vn&events=off HTTP/1.1\" "
+ + "200 35517 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [22/Nov/2019:00:00:00 +0000] "
+ + "\"GET /userstats-relay-country.png?"
+ + "start=2010-01-01&end=2019-11-22&country=vn&events=off HTTP/1.1\" "
+ + "200 28041 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [22/Nov/2019:00:00:00 +0000] "
+ + "\"GET /userstats-relay-country.png?"
+ + "start=2010-01-01&end=2019-11-22&country=vn&events=off HTTP/1.1\" "
+ + "200 28041 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [23/Nov/2019:00:00:00 +0000] \"GET / HTTP/1.1\" "
+ + "200 3336 \"-\" \"-\" -\n" }
+ };
+
+ /** Sanitized web server access logs as output of tests. */
+ private static final String[][] outputLogs = new String[][] {
+ { "metrics.torproject.org_meronense.torproject.org_"
+ + "access.log_20191120.xz",
+ "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"GET /collector/archive/bridge-descriptors/extra-infos/ "
+ + "HTTP/1.1\" 200 48013\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"GET /collector/recent/relay-descriptors/ HTTP/1.1\" 200 10227\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"HEAD /collector/recent/exit-lists/ HTTP/1.1\" 200 -\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"HEAD /collector/recent/relay-descriptors/microdescs/ "
+ + "HTTP/1.1\" 200 -\n" },
+ { "metrics.torproject.org_meronense.torproject.org_"
+ + "access.log_20191121.xz",
+ "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] "
+ + "\"GET /images/cc/sk.png HTTP/1.1\" 200 395\n"
+ + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] "
+ + "\"GET /images/favicon.ico HTTP/1.1\" 200 1150\n"
+ + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] "
+ + "\"GET /images/flags/authority.png HTTP/1.1\" 200 325\n"
+ + "0.0.0.0 - - [21/Nov/2019:00:00:00 +0000] "
+ + "\"GET /news.atom HTTP/1.1\" 200 36362\n" }
+ };
+
+ /** Temporary folder containing all files for this test. */
+ @Rule
+ public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+ /** Directory containing web server logs to sanitize. */
+ private Path inputDirectory;
+
+ /** Directory storing all intermediate state that needs to be preserved
+ * between processing runs. */
+ private Path statsDirectory;
+
+ /** Directory holding sanitized bridge descriptor files for tarballs. */
+ private Path outDirectory;
+
+ /** Directory holding recent sanitized web server logs. */
+ private Path recentDirectory;
+
+ /** CollecTor configuration for this test. */
+ private Configuration configuration;
+
+ /** Prepares the temporary folder and the various builders for this
+ * test. */
+ @Before
+ public void createTemporaryFolderAndBuilders()
+ throws IOException {
+ this.inputDirectory = this.temporaryFolder.newFolder("in",
+ "webstats", "meronense.torproject.org").toPath();
+ this.statsDirectory = this.temporaryFolder.newFolder("stats").toPath();
+ this.outDirectory = this.temporaryFolder.newFolder("out").toPath();
+ this.recentDirectory = this.temporaryFolder.newFolder("indexed", "recent")
+ .toPath();
+ this.initializeTestConfiguration();
+ }
+
+ /** Initializes a configuration for the bridge descriptor sanitizer. */
+ private void initializeTestConfiguration() throws IOException {
+ this.configuration = new Configuration();
+ this.configuration.load(getClass().getClassLoader().getResourceAsStream(
+ Main.CONF_FILE));
+ this.configuration.setProperty(Key.WebstatsActivated.name(), "true");
+ this.configuration.setProperty(Key.WebstatsLocalOrigins.name(),
+ this.inputDirectory.toString());
+ this.configuration.setProperty(Key.StatsPath.name(),
+ this.statsDirectory.toString());
+ this.configuration.setProperty(Key.RecentPath.name(),
+ this.recentDirectory.toString());
+ this.configuration.setProperty(Key.OutputPath.name(),
+ this.outDirectory.toString());
+ }
+
+ private void writeInputFiles(String[] ... inputLogs) throws IOException {
+ for (String[] inputLog : inputLogs) {
+ Path inputLogFile = this.inputDirectory.resolve(inputLog[0]);
+ if (!Files.exists(inputLogFile.getParent())) {
+ Files.createDirectories(inputLogFile.getParent());
+ }
+ try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
+ new GzipCompressorOutputStream(
+ Files.newOutputStream(inputLogFile))))) {
+ bw.write(inputLog[1]);
+ }
+ }
+ }
+
+ private void deleteInputFiles(String[] ... deleteLogs) throws IOException {
+ for (String[] deleteLog : deleteLogs) {
+ Path deleteLogFile = this.inputDirectory.resolve(deleteLog[0]);
+ Files.delete(deleteLogFile);
+ }
+ }
+
+ private void sanitizeWeblogs() {
+ SanitizeWeblogs sw = new SanitizeWeblogs(this.configuration);
+ sw.startProcessing();
+ }
+
+ private void compareResults(String[] ... outputLogs)
+ throws DescriptorParseException {
+ SortedMap<String, WebServerAccessLog> parsedLogs = new TreeMap<>();
+ for (Descriptor descriptor
+ : DescriptorSourceFactory.createDescriptorReader()
+ .readDescriptors(this.recentDirectory.toFile())) {
+ if (!(descriptor instanceof WebServerAccessLog)) {
+ fail("Parsed descriptor of unknown type.");
+ } else {
+ WebServerAccessLog wsal = (WebServerAccessLog) descriptor;
+ parsedLogs.put(wsal.getDescriptorFile().getName(), wsal);
+ }
+ }
+ assertEquals(outputLogs.length, parsedLogs.size());
+ for (String[] outputLog : outputLogs) {
+ String expectedLogFilename = outputLog[0];
+ List<String> expectedLogLines = Arrays.asList(outputLog[1].split("\n"));
+ assertTrue(parsedLogs.containsKey(expectedLogFilename));
+ List<String> actualLogLines = new ArrayList<>();
+ parsedLogs.get(expectedLogFilename).logLines()
+ .forEach((line) -> actualLogLines.add(line.toString()));
+ assertEquals(expectedLogLines, actualLogLines);
+ }
+ }
+
+ @Test
+ public void testSingleRun() throws Exception {
+ this.writeInputFiles(inputLogs);
+ this.sanitizeWeblogs();
+ this.compareResults(outputLogs);
+ }
+
@Test
- public void bytesForTest() {
- String lines = "line\nline\nline\nline\nline\n"
- + "line\nline\nline\nline\nline\n";
- assertEquals(lines, new String(SanitizeWeblogs.bytesFor("line", 10)));
+ public void testSubsequentRuns() throws Exception {
+ for (String[] inputLog : inputLogs) {
+ this.writeInputFiles(inputLog);
+ this.sanitizeWeblogs();
+ }
+ this.compareResults(outputLogs);
}
+ @Test
+ public void testSubsequentRunsReverseOrder() throws Exception {
+ for (int i = inputLogs.length - 1; i >= 0; i--) {
+ this.writeInputFiles(inputLogs[i]);
+ this.sanitizeWeblogs();
+ }
+ this.compareResults(outputLogs);
+ }
+
+ @Test
+ public void testSlidingWindow() throws Exception {
+ this.writeInputFiles(inputLogs[0], inputLogs[1], inputLogs[2]);
+ this.sanitizeWeblogs();
+ this.compareResults(outputLogs[0]);
+ this.deleteInputFiles(inputLogs[0]);
+ this.writeInputFiles(inputLogs[3]);
+ this.sanitizeWeblogs();
+ this.compareResults(outputLogs);
+ }
+
+ @Test
+ public void testSingleDayNoLimit() throws Exception {
+ this.configuration.setProperty(Key.WebstatsLimits.name(), "false");
+ this.writeInputFiles(new String[][] {
+ { "metrics.torproject.org-access.log-20191120.gz",
+ "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"GET /collector/recent/relay-descriptors/ "
+ + "HTTP/1.1\" 200 10227 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"HEAD /collector/recent/relay-descriptors/microdescs/ "
+ + "HTTP/1.1\" 200 - \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"HEAD /collector/recent/exit-lists/ "
+ + "HTTP/1.1\" 200 - \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] "
+ + "\"GET /collector/archive/bridge-descriptors/extra-infos/ "
+ + "HTTP/1.1\" 200 48013 \"-\" \"-\" -\n" } });
+ this.sanitizeWeblogs();
+ this.compareResults(outputLogs[0]);
+ }
+
+ @Test
+ public void testErrorLog() throws Exception {
+ this.configuration.setProperty(Key.WebstatsLimits.name(), "false");
+ this.writeInputFiles(new String[][] {
+ { "metrics.torproject.org-error.log-20191121.gz",
+ "[Thu Nov 21 15:13:15.211234 2019] [authz_core:error] "
+ + "[pid 12920:tid 139635582793920] [client 127.0.0.1:59912]\n" } });
+ this.sanitizeWeblogs();
+ this.compareResults();
+ }
+
+ @Test
+ public void testNonMatchingLines() throws Exception {
+ this.configuration.setProperty(Key.WebstatsLimits.name(), "false");
+ this.writeInputFiles(new String[][] {
+ { "metrics.torproject.org-access.log-20191121.gz",
+ "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] \"GET /favicon.ico "
+ + "HTTP/1.1\" 404 8903 \"-\" \"-\" -\n"
+ + "0.0.0.0 - - [20/Nov/2019:00:00:00 +0000] \"POST /con.php "
+ + "HTTP/1.1\" 301 320 \"http://metrics.torproject.org/con.php\" "
+ + "\"-\" -\n"
+ + "[Thu Nov 21 15:13:15.211234 2019] [authz_core:error] "
+ + "[pid 12920:tid 139635582793920] [client 127.0.0.1:59912]\n" } });
+ this.sanitizeWeblogs();
+ this.compareResults();
+ }
}
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits