[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [collector/master] Download .tpf files from OnionPerf hosts.



commit 6dd06f3f298ffd3b64abfd28214944f9f3cc01a9
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date:   Wed Mar 8 16:26:24 2017 +0100

    Download .tpf files from OnionPerf hosts.
    
    Implements #21272.
---
 CHANGELOG.md                                       |   3 +
 .../java/org/torproject/collector/conf/Key.java    |   3 +-
 .../collector/torperf/TorperfDownloader.java       | 237 ++++++++++++++++++++-
 src/main/resources/collector.properties            |   7 +
 .../collector/conf/ConfigurationTest.java          |   2 +-
 5 files changed, 249 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2365447..5e1107f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Changes in version 1.?.? - 2017-??-??
 
+ * Major changes
+   - Download .tpf files from OnionPerf hosts.
+
  * Medium changes
    - Clean up files in recent/exit-lists/ again.
 
diff --git a/src/main/java/org/torproject/collector/conf/Key.java b/src/main/java/org/torproject/collector/conf/Key.java
index 0274c98..dd35322 100644
--- a/src/main/java/org/torproject/collector/conf/Key.java
+++ b/src/main/java/org/torproject/collector/conf/Key.java
@@ -57,7 +57,8 @@ public enum Key {
   ReplaceIpAddressesWithHashes(Boolean.class),
   BridgeDescriptorMappingsLimit(Integer.class),
   TorperfFilesLines(String[].class),
-  TorperfHosts(String[][].class);
+  TorperfHosts(String[][].class),
+  OnionPerfHosts(URL[].class);
 
   private Class clazz;
   private static Set<String> keys;
diff --git a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java
index b09a6d6..2cd99df 100644
--- a/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java
+++ b/src/main/java/org/torproject/collector/torperf/TorperfDownloader.java
@@ -8,6 +8,11 @@ import org.torproject.collector.conf.Configuration;
 import org.torproject.collector.conf.ConfigurationException;
 import org.torproject.collector.conf.Key;
 import org.torproject.collector.cron.CollecTorMain;
+import org.torproject.descriptor.Descriptor;
+import org.torproject.descriptor.DescriptorParseException;
+import org.torproject.descriptor.DescriptorParser;
+import org.torproject.descriptor.DescriptorSourceFactory;
+import org.torproject.descriptor.TorperfResult;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -18,21 +23,34 @@ import java.io.File;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
 import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+import java.text.DateFormat;
+import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
+import java.util.SortedSet;
 import java.util.Stack;
 import java.util.TimeZone;
 import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /* Download possibly truncated Torperf .data and .extradata files from
  * configured sources, append them to the files we already have, and merge
- * the two files into the .tpf format. */
+ * the two files into the .tpf format;
+ * also download .tpf files from OnionPerf hosts. */
 public class TorperfDownloader extends CollecTorMain {
 
   private static final Logger logger = LoggerFactory.getLogger(
@@ -50,6 +68,24 @@ public class TorperfDownloader extends CollecTorMain {
   private SimpleDateFormat dateFormat;
   private File torperfLastMergedFile;
 
+  /** File containing the download history, which is necessary, because
+   * OnionPerf does not delete older .tpf files, but which enables us to do
+   * so. */
+  private File onionPerfDownloadedFile;
+
+  /** Full URLs of .tpf files downloaded in the current or in past
+   * executions. */
+  private SortedSet<String> downloadedTpfFiles = new TreeSet<>();
+
+  /** Base URLs of configured OnionPerf hosts. */
+  private URL[] onionPerfHosts = null;
+
+  /** Directory for storing archived .tpf files. */
+  private File archiveDirectory = null;
+
+  /** Directory for storing recent .tpf files. */
+  private File recentDirectory = null;
+
   @Override
   public String module() {
     return TORPERF;
@@ -81,6 +117,20 @@ public class TorperfDownloader extends CollecTorMain {
     }
     this.writeLastMergedTimestamps();
 
+    this.onionPerfDownloadedFile =
+        new File(config.getPath(Key.StatsPath).toFile(),
+        "onionperf-downloaded");
+    this.onionPerfHosts = config.getUrlArray(Key.OnionPerfHosts);
+    this.readDownloadedOnionPerfTpfFiles();
+    this.archiveDirectory = new File(config.getPath(Key.OutputPath).toFile(),
+        TORPERF);
+    this.recentDirectory = new File(config.getPath(Key.RecentPath).toFile(),
+        TORPERF);
+    for (URL baseUrl : this.onionPerfHosts) {
+      this.downloadFromOnionPerfHost(baseUrl);
+    }
+    this.writeDownloadedOnionPerfTpfFiles();
+
     this.cleanUpRsyncDirectory();
   }
 
@@ -617,6 +667,191 @@ public class TorperfDownloader extends CollecTorMain {
     this.cachedTpfLines = null;
   }
 
+  private void readDownloadedOnionPerfTpfFiles() {
+    if (!this.onionPerfDownloadedFile.exists()) {
+      return;
+    }
+    try (BufferedReader br = new BufferedReader(new FileReader(
+          this.onionPerfDownloadedFile))) {
+      String line;
+      while ((line = br.readLine()) != null) {
+        this.downloadedTpfFiles.add(line);
+      }
+    } catch (IOException e) {
+      logger.info("Unable to read download history file '"
+          + this.onionPerfDownloadedFile.getAbsolutePath() + "'.  Ignoring "
+          + "download history and downloading all available .tpf files.");
+      this.downloadedTpfFiles.clear();
+    }
+  }
+
+  private void downloadFromOnionPerfHost(URL baseUrl) {
+    logger.info("Downloading from OnionPerf host {}", baseUrl);
+    List<String> tpfFileNames =
+        this.downloadOnionPerfDirectoryListing(baseUrl);
+    String source = baseUrl.getHost().split("\\.")[0];
+    for (String tpfFileName : tpfFileNames) {
+      this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName);
+    }
+  }
+
+  /** Pattern for links contained in directory listings. */
+  private static final Pattern TPF_FILE_URL_PATTERN =
+      Pattern.compile(".*<a href=\"([^\"]+\\.tpf)\">.*");
+
+  private List<String> downloadOnionPerfDirectoryListing(URL baseUrl) {
+    List<String> tpfFileUrls = new ArrayList<>();
+    try (BufferedReader br = new BufferedReader(new InputStreamReader(
+        baseUrl.openStream()))) {
+      String line;
+      while ((line = br.readLine()) != null) {
+        Matcher matcher = TPF_FILE_URL_PATTERN.matcher(line);
+        if (matcher.matches() && !matcher.group(1).startsWith("/")) {
+          tpfFileUrls.add(matcher.group(1));
+        }
+      }
+    } catch (IOException e) {
+      logger.warn("Unable to download directory listing from '{}'.  Skipping "
+          + "this OnionPerf host.", baseUrl);
+      tpfFileUrls.clear();
+    }
+    return tpfFileUrls;
+  }
+
+  private static final DateFormat DATE_FORMAT;
+
+  static {
+    DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
+    DATE_FORMAT.setLenient(false);
+    DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
+  }
+
+  private void downloadAndParseOnionPerfTpfFile(URL baseUrl, String source,
+      String tpfFileName) {
+    URL tpfFileUrl;
+    try {
+      tpfFileUrl = new URL(baseUrl, tpfFileName);
+    } catch (MalformedURLException e1) {
+      logger.warn("Unable to put together base URL '{}' and .tpf file path "
+          + "'{}' to a URL.  Skipping.", baseUrl, tpfFileName);
+      return;
+    }
+
+    /* Skip if we successfully downloaded this file before. */
+    if (this.downloadedTpfFiles.contains(tpfFileUrl.toString())) {
+      return;
+    }
+
+    /* Verify file name before downloading: source-filesize-yyyy-MM-dd.tpf */
+    String[] tpfFileNameParts = tpfFileName.split("-");
+    if (!tpfFileName.startsWith(source + "-")
+        || tpfFileName.length() < "s-f-yyyy-MM-dd".length()
+        || tpfFileNameParts.length < 5) {
+      logger.warn("Invalid .tpf file name '{}{}'.  Skipping.", baseUrl,
+          tpfFileName);
+      return;
+    }
+    int fileSize = 0;
+    String date = null;
+    try {
+      fileSize = Integer.parseInt(
+          tpfFileNameParts[tpfFileNameParts.length - 4]);
+      date = tpfFileName.substring(tpfFileName.length() - 14,
+          tpfFileName.length() - 4);
+      DATE_FORMAT.parse(date);
+    } catch (NumberFormatException | ParseException e) {
+      logger.warn("Invalid .tpf file name '{}{}'.  Skipping.", baseUrl,
+          tpfFileName, e);
+      return;
+    }
+
+    /* Download file contents to temporary file. */
+    File tempFile = new File(this.recentDirectory, "." + tpfFileName);
+    tempFile.getParentFile().mkdirs();
+    try (InputStream is = new URL(baseUrl + tpfFileName).openStream()) {
+      Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+    } catch (IOException e) {
+      logger.warn("Unable to download '{}{}' to temporary file '{}'.  "
+          + "Skipping.", baseUrl, tpfFileName, tempFile, e);
+      return;
+    }
+
+    /* Validate contained descriptors. */
+    DescriptorParser descriptorParser =
+        DescriptorSourceFactory.createDescriptorParser();
+    List<Descriptor> descriptors;
+    try {
+      descriptors = descriptorParser.parseDescriptors(
+          Files.readAllBytes(tempFile.toPath()), tpfFileName);
+    } catch (IOException | DescriptorParseException e) {
+      logger.warn("OnionPerf file '{}{}' could not be parsed.  "
+          + "Skipping.", baseUrl, tpfFileName, e);
+      tempFile.delete();
+      return;
+    }
+    String message = null;
+    for (Descriptor descriptor : descriptors) {
+      if (!(descriptor instanceof TorperfResult)) {
+        message = "File contains descriptors other than Torperf results.";
+        break;
+      }
+      TorperfResult torperf = (TorperfResult) descriptor;
+      if (!source.equals(torperf.getSource())) {
+        message = "File contains Torperf result from another source.";
+        break;
+      }
+      if (fileSize != torperf.getFileSize()) {
+        message = "File contains Torperf result from another file size.";
+        break;
+      }
+      if (!date.equals(DATE_FORMAT.format(torperf.getStartMillis()))) {
+        message = "File contains Torperf result from another date.";
+        break;
+      }
+    }
+    if (null != message) {
+      logger.warn("OnionPerf file '{}{}' was found to be invalid: {}.  "
+          + "Skipping.", baseUrl, tpfFileName, message);
+      tempFile.delete();
+      return;
+    }
+
+    /* Copy/move files in place. */
+    File archiveFile = new File(this.archiveDirectory,
+         date.replaceAll("-", "/") + "/" + tpfFileName);
+    archiveFile.getParentFile().mkdirs();
+    try {
+      Files.copy(tempFile.toPath(), archiveFile.toPath(),
+          StandardCopyOption.REPLACE_EXISTING);
+    } catch (IOException e) {
+      logger.warn("Unable to copy OnionPerf file {} to {}.  Skipping.",
+          tempFile, archiveFile, e);
+      tempFile.delete();
+      return;
+    }
+    File recentFile = new File(this.recentDirectory, tpfFileName);
+    tempFile.renameTo(recentFile);
+
+    /* Add to download history to avoid downloading it again. */
+    this.downloadedTpfFiles.add(baseUrl + tpfFileName);
+  }
+
+  private void writeDownloadedOnionPerfTpfFiles() {
+    this.onionPerfDownloadedFile.getParentFile().mkdirs();
+    try (BufferedWriter bw = new BufferedWriter(new FileWriter(
+          this.onionPerfDownloadedFile))) {
+      for (String line : this.downloadedTpfFiles) {
+        bw.write(line);
+        bw.newLine();
+      }
+    } catch (IOException e) {
+      logger.warn("Unable to write download history file '"
+          + this.onionPerfDownloadedFile.getAbsolutePath() + "'.  This may "
+          + "result in ignoring history and downloading all available .tpf "
+          + "files in the next execution.", e);
+    }
+  }
+
   /** Delete all files from the rsync directory that have not been modified
    * in the last three days. */
   public void cleanUpRsyncDirectory() throws ConfigurationException {
diff --git a/src/main/resources/collector.properties b/src/main/resources/collector.properties
index 593d580..fb43495 100644
--- a/src/main/resources/collector.properties
+++ b/src/main/resources/collector.properties
@@ -150,3 +150,10 @@ TorperfHosts = torperf, http://torperf.torproject.org/
 ## available on a given host (multiple times lists can be given
 ## TorperfFiles = torperf 51200 50kb.data 50kb.extradata, torperf 1048576 1mb.data 1mb.extradata
 TorperfFilesLines = torperf 51200 50kb.data 50kb.extradata, torperf 1048576 1mb.data 1mb.extradata, torperf 5242880 5mb.data 5mb.extradata
+
+## OnionPerf base URLs
+## Hosts must be configured to use the first subdomain part of the given URL as
+## source name, e.g., SOURCE=first for the first URL below, SOURCE=second for
+## the second, etc.:
+## OnionPerfHosts = http://first.torproject.org/, http://second.torproject.org/
+OnionPerfHosts = https://op-us.onionperf.torproject.net/
diff --git a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
index 287fb11..90065b0 100644
--- a/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
+++ b/src/test/java/org/torproject/collector/conf/ConfigurationTest.java
@@ -40,7 +40,7 @@ public class ConfigurationTest {
   public void testKeyCount() throws Exception {
     assertEquals("The number of properties keys in enum Key changed."
         + "\n This test class should be adapted.",
-        44, Key.values().length);
+        45, Key.values().length);
   }
 
   @Test()



_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits