[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-lib/master] Accomodate logs with more than Integer.MAX_VALUE lines.
commit 35feb816f81f26bcc9dc035a1aaf496c34a86647
Author: iwakeh <iwakeh@xxxxxxxxxxxxxx>
Date: Fri Feb 16 09:05:46 2018 +0000
Accomodate logs with more than Integer.MAX_VALUE lines.
Implements task-23046.
---
.../org/torproject/descriptor/LogDescriptor.java | 10 +++++--
.../torproject/descriptor/WebServerAccessLog.java | 6 ++++
.../descriptor/log/WebServerAccessLogImpl.java | 32 ++++++++++++++++++----
.../descriptor/log/LogDescriptorTest.java | 5 +++-
4 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/torproject/descriptor/LogDescriptor.java b/src/main/java/org/torproject/descriptor/LogDescriptor.java
index 826fcda..8dd8460 100644
--- a/src/main/java/org/torproject/descriptor/LogDescriptor.java
+++ b/src/main/java/org/torproject/descriptor/LogDescriptor.java
@@ -5,6 +5,7 @@ package org.torproject.descriptor;
import java.io.InputStream;
import java.util.List;
+import java.util.stream.Stream;
/**
* Contains a log file.
@@ -64,11 +65,14 @@ public interface LogDescriptor extends Descriptor {
public List<String> getUnrecognizedLines();
/**
- * Returns a list of all parseable log lines.
- * <p>Might require a lot of memory depending on log size.</p>
+ * Returns a stream of all parseable log lines.
+ * <p>Depending on log size this might not fit into a collection type.</p>
+ *
+ * @since 2.2.0
*/
- public List<? extends Line> logLines() throws DescriptorParseException;
+ public Stream<? extends Line> logLines() throws DescriptorParseException;
+ /** Base interface for accessing log lines. */
public interface Line {
/** Returns a log line string. */
diff --git a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
index b4f1940..5f3ad73 100644
--- a/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
+++ b/src/main/java/org/torproject/descriptor/WebServerAccessLog.java
@@ -62,6 +62,12 @@ public interface WebServerAccessLog extends LogDescriptor {
@Override
public List<String> getUnrecognizedLines();
+ /**
+ * Facilitates access to all log line fields that don't only contain
+ * default values post sanitization.
+ *
+ * @since 2.2.0
+ */
public interface Line extends LogDescriptor.Line {
/** Returns the IP address of the requesting host. */
diff --git a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
index e48a262..3666d5d 100644
--- a/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
+++ b/src/main/java/org/torproject/descriptor/log/WebServerAccessLogImpl.java
@@ -15,10 +15,11 @@ import java.io.File;
import java.io.InputStreamReader;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* Implementation of web server access log descriptors.
@@ -128,15 +129,34 @@ public class WebServerAccessLogImpl extends LogDescriptorImpl
return this.logDate;
}
- /** Returns a list of all valid log lines. */
+ private static final int LISTLIMIT = Integer.MAX_VALUE / 2;
+
+ /** Returns a stream of all valid log lines. */
@Override
- public List<WebServerAccessLog.Line> logLines()
+ public Stream<WebServerAccessLog.Line> logLines()
throws DescriptorParseException {
try (BufferedReader br = new BufferedReader(new InputStreamReader(
this.decompressedByteStream()))) {
- return br.lines().map(line
- -> (WebServerAccessLog.Line) WebServerAccessLogLine.makeLine(line))
- .filter(line -> line.isValid()).collect(Collectors.toList());
+ List<List<WebServerAccessLogLine>> lists = new ArrayList<>();
+ List<WebServerAccessLogLine> currentList = new ArrayList<>();
+ lists.add(currentList);
+ String lineStr = br.readLine();
+ int count = 0;
+ while (null != lineStr) {
+ WebServerAccessLogLine wsal = WebServerAccessLogLine.makeLine(lineStr);
+ if (wsal.isValid()) {
+ currentList.add(wsal);
+ count++;
+ }
+ if (count >= LISTLIMIT) {
+ currentList = new ArrayList<>();
+ lists.add(currentList);
+ count = 0;
+ }
+ lineStr = br.readLine();
+ }
+ br.close();
+ return lists.stream().flatMap(list -> list.stream());
} catch (Exception ex) {
throw new DescriptorParseException("Cannot retrieve log lines.", ex);
}
diff --git a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
index 67ba638..0ff3e62 100644
--- a/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
+++ b/src/test/java/org/torproject/descriptor/log/LogDescriptorTest.java
@@ -3,6 +3,8 @@
package org.torproject.descriptor.log;
+import static java.util.stream.Collectors.toList;
+
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -130,7 +132,8 @@ public class LogDescriptorTest {
InternalLogDescriptor ld = (InternalLogDescriptor) descs.get(0);
assertEquals("Wrong compression type string. " + dataUsed(),
pan[4], ld.getCompressionType());
- List<? extends LogDescriptor.Line> lines = ld.logLines();
+ List<? extends LogDescriptor.Line> lines
+ = ld.logLines().collect(toList());
assertEquals(this.lineCount, lines.size());
}
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits