[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [collector/master] Download .tpf files from OnionPerf hosts.
commit 6dd06f3f298ffd3b64abfd28214944f9f3cc01a9
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Wed Mar 8 16:26:24 2017 +0100
Download .tpf files from OnionPerf hosts.
Implements #21272.
---
CHANGELOG.md | 3 +
.../java/org/torproject/collector/conf/Key.java | 3 +-
.../collector/torperf/TorperfDownloader.java | 237 ++++++++++++++++++++-
src/main/resources/collector.properties | 7 +
.../collector/conf/ConfigurationTest.java | 2 +-
5 files changed, 249 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2365447..5e1107f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
# Changes in version 1.?.? - 2017-??-??
+ * Major changes
+ - Download .tpf files from OnionPerf hosts.
+
* Medium changes
- Clean up files in recent/exit-lists/ again.
diff --git a/src/main/java/org/torproject/collector/conf/Key.java b/src/main/java/org/torproject/collector/conf/Key.java
index 0274c98..dd35322 100644
--- a/src/main/java/org/torproject/collector/conf/Key.java
+++ b/src/main/java/org/torproject/collector/conf/Key.java
@@ -57,7 +57,8 @@ public enum Key {
ReplaceIpAddressesWithHashes(Boolean.class),
BridgeDescriptorMappingsLimit(Integer.class),
TorperfFilesLines(String[].class),
- TorperfHosts(String[][].class);
+ TorperfHosts(String[][].class),
+ OnionPerfHosts(URL[].class);
private Class clazz;
private static Set<String> keys;
diff --git a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java
index b09a6d6..2cd99df 100644
--- a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java
+++ b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java
@@ -8,6 +8,11 @@ import org.torproject.collector.conf.Configuration;
import org.torproject.collector.conf.ConfigurationException;
import org.torproject.collector.conf.Key;
import org.torproject.collector.cron.CollecTorMain;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.TorperfResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -18,21 +23,34 @@ import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
+import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.text.DateFormat;
+import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.SortedMap;
+import java.util.SortedSet;
import java.util.Stack;
import java.util.TimeZone;
import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/* Download possibly truncated Torperf .data and .extradata files from
* configured sources, append them to the files we already have, and merge
- * the two files into the .tpf format. */
+ * the two files into the .tpf format;
+ * also download .tpf files from OnionPerf hosts. */
public class TorperfDownloader extends CollecTorMain {
private static final Logger logger = LoggerFactory.getLogger(
@@ -50,6 +68,24 @@ public class TorperfDownloader extends CollecTorMain {
private SimpleDateFormat dateFormat;
private File torperfLastMergedFile;
+ /** File containing the download history, which is necessary, because
+ * OnionPerf does not delete older .tpf files, but which enables us to do
+ * so. */
+ private File onionPerfDownloadedFile;
+
+ /** Full URLs of .tpf files downloaded in the current or in past
+ * executions. */
+ private SortedSet<String> downloadedTpfFiles = new TreeSet<>();
+
+ /** Base URLs of configured OnionPerf hosts. */
+ private URL[] onionPerfHosts = null;
+
+ /** Directory for storing archived .tpf files. */
+ private File archiveDirectory = null;
+
+ /** Directory for storing recent .tpf files. */
+ private File recentDirectory = null;
+
@Override
public String module() {
return TORPERF;
@@ -81,6 +117,20 @@ public class TorperfDownloader extends CollecTorMain {
}
this.writeLastMergedTimestamps();
+ this.onionPerfDownloadedFile =
+ new File(config.getPath(Key.StatsPath).toFile(),
+ "onionperf-downloaded");
+ this.onionPerfHosts = config.getUrlArray(Key.OnionPerfHosts);
+ this.readDownloadedOnionPerfTpfFiles();
+ this.archiveDirectory = new File(config.getPath(Key.OutputPath).toFile(),
+ TORPERF);
+ this.recentDirectory = new File(config.getPath(Key.RecentPath).toFile(),
+ TORPERF);
+ for (URL baseUrl : this.onionPerfHosts) {
+ this.downloadFromOnionPerfHost(baseUrl);
+ }
+ this.writeDownloadedOnionPerfTpfFiles();
+
this.cleanUpRsyncDirectory();
}
@@ -617,6 +667,191 @@ public class TorperfDownloader extends CollecTorMain {
this.cachedTpfLines = null;
}
+ private void readDownloadedOnionPerfTpfFiles() {
+ if (!this.onionPerfDownloadedFile.exists()) {
+ return;
+ }
+ try (BufferedReader br = new BufferedReader(new FileReader(
+ this.onionPerfDownloadedFile))) {
+ String line;
+ while ((line = br.readLine()) != null) {
+ this.downloadedTpfFiles.add(line);
+ }
+ } catch (IOException e) {
+ logger.info("Unable to read download history file '"
+ + this.onionPerfDownloadedFile.getAbsolutePath() + "'. Ignoring "
+ + "download history and downloading all available .tpf files.");
+ this.downloadedTpfFiles.clear();
+ }
+ }
+
+ private void downloadFromOnionPerfHost(URL baseUrl) {
+ logger.info("Downloading from OnionPerf host {}", baseUrl);
+ List<String> tpfFileNames =
+ this.downloadOnionPerfDirectoryListing(baseUrl);
+ String source = baseUrl.getHost().split("\\.")[0];
+ for (String tpfFileName : tpfFileNames) {
+ this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName);
+ }
+ }
+
+ /** Pattern for links contained in directory listings. */
+ private static final Pattern TPF_FILE_URL_PATTERN =
+ Pattern.compile(".*<a href=\"([^\"]+\\.tpf)\">.*");
+
+ private List<String> downloadOnionPerfDirectoryListing(URL baseUrl) {
+ List<String> tpfFileUrls = new ArrayList<>();
+ try (BufferedReader br = new BufferedReader(new InputStreamReader(
+ baseUrl.openStream()))) {
+ String line;
+ while ((line = br.readLine()) != null) {
+ Matcher matcher = TPF_FILE_URL_PATTERN.matcher(line);
+ if (matcher.matches() && !matcher.group(1).startsWith("/")) {
+ tpfFileUrls.add(matcher.group(1));
+ }
+ }
+ } catch (IOException e) {
+ logger.warn("Unable to download directory listing from '{}'. Skipping "
+ + "this OnionPerf host.", baseUrl);
+ tpfFileUrls.clear();
+ }
+ return tpfFileUrls;
+ }
+
+ private static final DateFormat DATE_FORMAT;
+
+ static {
+ DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
+ DATE_FORMAT.setLenient(false);
+ DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+ }
+
+ private void downloadAndParseOnionPerfTpfFile(URL baseUrl, String source,
+ String tpfFileName) {
+ URL tpfFileUrl;
+ try {
+ tpfFileUrl = new URL(baseUrl, tpfFileName);
+ } catch (MalformedURLException e1) {
+ logger.warn("Unable to put together base URL '{}' and .tpf file path "
+ + "'{}' to a URL. Skipping.", baseUrl, tpfFileName);
+ return;
+ }
+
+ /* Skip if we successfully downloaded this file before. */
+ if (this.downloadedTpfFiles.contains(tpfFileUrl.toString())) {
+ return;
+ }
+
+ /* Verify file name before downloading: source-filesize-yyyy-MM-dd.tpf */
+ String[] tpfFileNameParts = tpfFileName.split("-");
+ if (!tpfFileName.startsWith(source + "-")
+ || tpfFileName.length() < "s-f-yyyy-MM-dd".length()
+ || tpfFileNameParts.length < 5) {
+ logger.warn("Invalid .tpf file name '{}{}'. Skipping.", baseUrl,
+ tpfFileName);
+ return;
+ }
+ int fileSize = 0;
+ String date = null;
+ try {
+ fileSize = Integer.parseInt(
+ tpfFileNameParts[tpfFileNameParts.length - 4]);
+ date = tpfFileName.substring(tpfFileName.length() - 14,
+ tpfFileName.length() - 4);
+ DATE_FORMAT.parse(date);
+ } catch (NumberFormatException | ParseException e) {
+ logger.warn("Invalid .tpf file name '{}{}'. Skipping.", baseUrl,
+ tpfFileName, e);
+ return;
+ }
+
+ /* Download file contents to temporary file. */
+ File tempFile = new File(this.recentDirectory, "." + tpfFileName);
+ tempFile.getParentFile().mkdirs();
+ try (InputStream is = new URL(baseUrl + tpfFileName).openStream()) {
+ Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ } catch (IOException e) {
+ logger.warn("Unable to download '{}{}' to temporary file '{}'. "
+ + "Skipping.", baseUrl, tpfFileName, tempFile, e);
+ return;
+ }
+
+ /* Validate contained descriptors. */
+ DescriptorParser descriptorParser =
+ DescriptorSourceFactory.createDescriptorParser();
+ List<Descriptor> descriptors;
+ try {
+ descriptors = descriptorParser.parseDescriptors(
+ Files.readAllBytes(tempFile.toPath()), tpfFileName);
+ } catch (IOException | DescriptorParseException e) {
+ logger.warn("OnionPerf file '{}{}' could not be parsed. "
+ + "Skipping.", baseUrl, tpfFileName, e);
+ tempFile.delete();
+ return;
+ }
+ String message = null;
+ for (Descriptor descriptor : descriptors) {
+ if (!(descriptor instanceof TorperfResult)) {
+ message = "File contains descriptors other than Torperf results.";
+ break;
+ }
+ TorperfResult torperf = (TorperfResult) descriptor;
+ if (!source.equals(torperf.getSource())) {
+ message = "File contains Torperf result from another source.";
+ break;
+ }
+ if (fileSize != torperf.getFileSize()) {
+ message = "File contains Torperf result from another file size.";
+ break;
+ }
+ if (!date.equals(DATE_FORMAT.format(torperf.getStartMillis()))) {
+ message = "File contains Torperf result from another date.";
+ break;
+ }
+ }
+ if (null != message) {
+ logger.warn("OnionPerf file '{}{}' was found to be invalid: {}. "
+ + "Skipping.", baseUrl, tpfFileName, message);
+ tempFile.delete();
+ return;
+ }
+
+ /* Copy/move files in place. */
+ File archiveFile = new File(this.archiveDirectory,
+ date.replaceAll("-", "/") + "/" + tpfFileName);
+ archiveFile.getParentFile().mkdirs();
+ try {
+ Files.copy(tempFile.toPath(), archiveFile.toPath(),
+ StandardCopyOption.REPLACE_EXISTING);
+ } catch (IOException e) {
+ logger.warn("Unable to copy OnionPerf file {} to {}. Skipping.",
+ tempFile, archiveFile, e);
+ tempFile.delete();
+ return;
+ }
+ File recentFile = new File(this.recentDirectory, tpfFileName);
+ tempFile.renameTo(recentFile);
+
+ /* Add to download history to avoid downloading it again. */
+ this.downloadedTpfFiles.add(baseUrl + tpfFileName);
+ }
+
+ private void writeDownloadedOnionPerfTpfFiles() {
+ this.onionPerfDownloadedFile.getParentFile().mkdirs();
+ try (BufferedWriter bw = new BufferedWriter(new FileWriter(
+ this.onionPerfDownloadedFile))) {
+ for (String line : this.downloadedTpfFiles) {
+ bw.write(line);
+ bw.newLine();
+ }
+ } catch (IOException e) {
+ logger.warn("Unable to write download history file '"
+ + this.onionPerfDownloadedFile.getAbsolutePath() + "'. This may "
+ + "result in ignoring history and downloading all available .tpf "
+ + "files in the next execution.", e);
+ }
+ }
+
/** Delete all files from the rsync directory that have not been modified
* in the last three days. */
public void cleanUpRsyncDirectory() throws ConfigurationException {
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index 593d580..fb43495 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -150,3 +150,10 @@ TorperfHosts = torperf, http://torperf.torproject.org/
## available on a given host (multiple times lists can be given
## TorperfFiles = torperf 51200 50kb.data 50kb.extradata, torperf 1048576 1mb.data 1mb.extradata
TorperfFilesLines = torperf 51200 50kb.data 50kb.extradata, torperf 1048576 1mb.data 1mb.extradata, torperf 5242880 5mb.data 5mb.extradata
+
+## OnionPerf base URLs
+## Hosts must be configured to use the first subdomain part of the given URL as
+## source name, e.g., SOURCE=first for the first URL below, SOURCE=second for
+## the second, etc.:
+## OnionPerfHosts = http://first.torproject.org/, http://second.torproject.org/
+OnionPerfHosts = https://op-us.onionperf.torproject.net/
diff --git a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
index 287fb11..90065b0 100644
--- a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
@@ -40,7 +40,7 @@ public class ConfigurationTest {
public void testKeyCount() throws Exception {
assertEquals("The number of properties keys in enum Key changed."
+ "\n This test class should be adapted.",
- 44, Key.values().length);
+ 45, Key.values().length);
}
@Test()
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits