[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [metrics-web/master] Skip long resource strings.



commit 2f00ddf47bae7e3f6f3d9a2776cfacabadfb58ca
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date:   Wed Jan 25 14:21:16 2017 +0100

    Skip long resource strings.
    
    This patch fixes a bug where we'd consider two resource strings with
    the same first 2048 characters as two distinct resource strings
    internally, but which the database considers the same, because it only
    stores the first 2048 characters.  In reality, these are just hacking
    attempts or broken clients, so we can as well discard these lines
    entirely and not bother any further.
---
 .../main/java/org/torproject/metrics/webstats/Main.java |  3 ++-
 .../java/org/torproject/metrics/webstats/MainTest.java  | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java b/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
index ea5a368..4c02a0f 100644
--- a/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
+++ b/modules/webstats/src/main/java/org/torproject/metrics/webstats/Main.java
@@ -62,7 +62,8 @@ public class Main {
 
   static final Pattern LOG_LINE_PATTERN = Pattern.compile(
       "^0.0.0.[01] - - \\[\\d{2}/\\w{3}/\\d{4}:00:00:00 \\+0000\\] "
-      + "\"(GET|HEAD) ([^ ]+) HTTP[^ ]+\" (\\d+) (-|\\d+) \"-\" \"-\" -$");
+      + "\"(GET|HEAD) ([^ ]{1,2048}) HTTP[^ ]+\" (\\d+) (-|\\d+) \"-\" \"-\" "
+      + "-$");
 
   private static final String LOG_DATE = "log_date";
 
diff --git a/modules/webstats/src/test/java/org/torproject/metrics/webstats/MainTest.java b/modules/webstats/src/test/java/org/torproject/metrics/webstats/MainTest.java
index 1c4f0bc..7b59c54 100644
--- a/modules/webstats/src/test/java/org/torproject/metrics/webstats/MainTest.java
+++ b/modules/webstats/src/test/java/org/torproject/metrics/webstats/MainTest.java
@@ -4,6 +4,7 @@
 package org.torproject.metrics.webstats;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import org.junit.Test;
@@ -89,5 +90,21 @@ public class MainTest {
     assertEquals("/bubbles.html", matcher.group(2));
     assertEquals("200", matcher.group(3));
   }
+
+  @Test
+  public void testLogLinePatternMaxLength() {
+    int maxLength = 2048;
+    String pre = "0.0.0.0 - - [17/Jan/2017:00:00:00 +0000] \"GET ";
+    String post = " HTTP/1.0\" 200 10532 \"-\" \"-\" -";
+    StringBuilder sb = new StringBuilder();
+    while (sb.length() <= maxLength) {
+      sb.append("/https://www.torproject.org";);
+    }
+    String tooLongLogLine = pre + sb.toString() + post;
+    assertFalse(Main.LOG_LINE_PATTERN.matcher(tooLongLogLine).matches());
+    String notTooLongLogLine = pre + sb.toString().substring(0, maxLength)
+        + post;
+    assertTrue(Main.LOG_LINE_PATTERN.matcher(notTooLongLogLine).matches());
+  }
 }
 



_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits