[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [onionoo/master] Download input descriptors from CollecTor via https.
commit 5902c9044483764bdab3e8b76c299c55d11e0577
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Tue Jun 3 07:30:26 2014 +0200
Download input descriptors from CollecTor via https.
We used to rsync descriptors, but that was only a good idea when source
and destination were on the same host. As weasel points out, rsync
provides no server authentication, so we don't know what we get.
Another advantage is that all necessary steps for the hourly execution are
now implemented in Java. We might now consider to move to a different
execution model than an hourly cronjob much more easily.
---
bin/update.sh | 1 -
src/org/torproject/onionoo/DescriptorSource.java | 212 +++++++++++++++++++++-
src/org/torproject/onionoo/Main.java | 3 +
3 files changed, 210 insertions(+), 6 deletions(-)
diff --git a/bin/update.sh b/bin/update.sh
index 9f1424f..71d54a8 100755
--- a/bin/update.sh
+++ b/bin/update.sh
@@ -1,4 +1,3 @@
#!/bin/bash
-rsync -az --delete --exclude 'relay-descriptors/votes' --exclude 'relay-descriptors/microdescs' --exclude 'relay-descriptors/server-descriptors' --exclude 'relay-descriptors/extra-infos' --exclude 'bridge-descriptors/server-descriptors' --exclude 'bridge-descriptors/extra-infos' --exclude 'torperf' metrics.torproject.org::metrics-recent in
ant run >> log
diff --git a/src/org/torproject/onionoo/DescriptorSource.java b/src/org/torproject/onionoo/DescriptorSource.java
index be17fc4..9fb9715 100644
--- a/src/org/torproject/onionoo/DescriptorSource.java
+++ b/src/org/torproject/onionoo/DescriptorSource.java
@@ -1,13 +1,20 @@
-/* Copyright 2013 The Tor Project
+/* Copyright 2013, 2014 The Tor Project
* See LICENSE for licensing information */
package org.torproject.onionoo;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
+import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -19,6 +26,7 @@ import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.zip.GZIPInputStream;
import org.torproject.descriptor.BridgeNetworkStatus;
import org.torproject.descriptor.BridgePoolAssignment;
@@ -63,6 +71,169 @@ enum DescriptorHistory {
BRIDGE_POOLASSIGN_HISTORY,
}
+class DescriptorDownloader {
+
+ private final String protocolHostNameResourcePrefix =
+ "https://collector.torproject.org/recent/";
+
+ private String directory;
+
+ private final File inDir = new File("in");
+
+ public DescriptorDownloader(DescriptorType descriptorType) {
+ switch (descriptorType) {
+ case RELAY_CONSENSUSES:
+ this.directory = "relay-descriptors/consensuses/";
+ break;
+ case RELAY_SERVER_DESCRIPTORS:
+ this.directory = "relay-descriptors/server-descriptors/";
+ break;
+ case RELAY_EXTRA_INFOS:
+ this.directory = "relay-descriptors/extra-infos/";
+ break;
+ case EXIT_LISTS:
+ this.directory = "exit-lists/";
+ break;
+ case BRIDGE_STATUSES:
+ this.directory = "bridge-descriptors/statuses/";
+ break;
+ case BRIDGE_SERVER_DESCRIPTORS:
+ this.directory = "bridge-descriptors/server-descriptors/";
+ break;
+ case BRIDGE_EXTRA_INFOS:
+ this.directory = "bridge-descriptors/extra-infos/";
+ break;
+ case BRIDGE_POOL_ASSIGNMENTS:
+ this.directory = "bridge-pool-assignments/";
+ break;
+ default:
+ System.err.println("Unknown descriptor type.");
+ return;
+ }
+ }
+
+ private SortedSet<String> localFiles = new TreeSet<String>();
+
+ public int statLocalFiles() {
+ File localDirectory = new File(this.inDir, this.directory);
+ if (localDirectory.exists()) {
+ for (File file : localDirectory.listFiles()) {
+ this.localFiles.add(file.getName());
+ }
+ }
+ return this.localFiles.size();
+ }
+
+ private SortedSet<String> remoteFiles = new TreeSet<String>();
+
+ public int fetchRemoteDirectory() {
+ String directoryUrl = this.protocolHostNameResourcePrefix
+ + this.directory;
+ try {
+ URL u = new URL(directoryUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.connect();
+ if (huc.getResponseCode() != 200) {
+ System.err.println("Could not fetch " + directoryUrl
+ + ": " + huc.getResponseCode() + " "
+ + huc.getResponseMessage() + ". Skipping.");
+ return 0;
+ }
+ BufferedReader br = new BufferedReader(new InputStreamReader(
+ huc.getInputStream()));
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (!line.trim().startsWith("<tr>") ||
+ !line.contains("<a href=\"")) {
+ continue;
+ }
+ String linePart = line.substring(
+ line.indexOf("<a href=\"") + "<a href=\"".length());
+ if (!linePart.contains("\"")) {
+ continue;
+ }
+ linePart = linePart.substring(0, linePart.indexOf("\""));
+ if (linePart.endsWith("/")) {
+ continue;
+ }
+ this.remoteFiles.add(linePart);
+ }
+ br.close();
+ } catch (IOException e) {
+ System.err.println("Could not fetch or parse " + directoryUrl
+ + ". Skipping.");
+ }
+ return this.remoteFiles.size();
+ }
+
+ public int fetchRemoteFiles() {
+ int fetchedFiles = 0;
+ for (String remoteFile : this.remoteFiles) {
+ if (this.localFiles.contains(remoteFile)) {
+ continue;
+ }
+ String fileUrl = this.protocolHostNameResourcePrefix
+ + this.directory + remoteFile;
+ File localTempFile = new File(this.inDir, this.directory
+ + remoteFile + ".tmp");
+ File localFile = new File(this.inDir, this.directory + remoteFile);
+ try {
+ localFile.getParentFile().mkdirs();
+ URL u = new URL(fileUrl);
+ HttpURLConnection huc = (HttpURLConnection) u.openConnection();
+ huc.setRequestMethod("GET");
+ huc.addRequestProperty("Accept-Encoding", "gzip");
+ huc.connect();
+ if (huc.getResponseCode() != 200) {
+ System.err.println("Could not fetch " + fileUrl
+ + ": " + huc.getResponseCode() + " "
+ + huc.getResponseMessage() + ". Skipping.");
+ continue;
+ }
+ long lastModified = huc.getHeaderFieldDate("Last-Modified", -1L);
+ InputStream is;
+ if (huc.getContentEncoding() != null &&
+ huc.getContentEncoding().equalsIgnoreCase("gzip")) {
+ is = new GZIPInputStream(huc.getInputStream());
+ } else {
+ is = huc.getInputStream();
+ }
+ BufferedInputStream bis = new BufferedInputStream(is);
+ BufferedOutputStream bos = new BufferedOutputStream(
+ new FileOutputStream(localTempFile));
+ int len;
+ byte[] data = new byte[1024];
+ while ((len = bis.read(data, 0, 1024)) >= 0) {
+ bos.write(data, 0, len);
+ }
+ bis.close();
+ bos.close();
+ localTempFile.renameTo(localFile);
+ if (lastModified >= 0) {
+ localFile.setLastModified(lastModified);
+ }
+ fetchedFiles++;
+ } catch (IOException e) {
+ System.err.println("Could not fetch or store " + fileUrl
+ + ". Skipping.");
+ }
+ }
+ return fetchedFiles;
+ }
+
+ public int deleteOldLocalFiles() {
+ int deletedFiles = 0;
+ for (String localFile : this.localFiles) {
+ if (!this.remoteFiles.contains(localFile)) {
+ new File(this.inDir, this.directory + localFile).delete();
+ deletedFiles++;
+ }
+ }
+ return deletedFiles;
+ }
+}
+
class DescriptorQueue {
private File inDir;
@@ -112,22 +283,22 @@ class DescriptorQueue {
directoryName = "relay-descriptors/consensuses";
break;
case RELAY_SERVER_DESCRIPTORS:
- directoryName = "relay-descriptors/server-descriptors-cat";
+ directoryName = "relay-descriptors/server-descriptors";
maxDescriptorFilesInQueue = 1;
break;
case RELAY_EXTRA_INFOS:
- directoryName = "relay-descriptors/extra-infos-cat";
+ directoryName = "relay-descriptors/extra-infos";
maxDescriptorFilesInQueue = 1;
break;
case BRIDGE_STATUSES:
directoryName = "bridge-descriptors/statuses";
break;
case BRIDGE_SERVER_DESCRIPTORS:
- directoryName = "bridge-descriptors/server-descriptors-cat";
+ directoryName = "bridge-descriptors/server-descriptors";
maxDescriptorFilesInQueue = 1;
break;
case BRIDGE_EXTRA_INFOS:
- directoryName = "bridge-descriptors/extra-infos-cat";
+ directoryName = "bridge-descriptors/extra-infos";
maxDescriptorFilesInQueue = 1;
break;
case BRIDGE_POOL_ASSIGNMENTS:
@@ -327,6 +498,29 @@ public class DescriptorSource {
this.fingerprintListeners.get(descriptorType).add(listener);
}
+ public void downloadDescriptors() {
+ for (DescriptorType descriptorType : DescriptorType.values()) {
+ this.downloadDescriptors(descriptorType);
+ }
+ }
+
+ private int localFilesBefore = 0, foundRemoteFiles = 0,
+ downloadedFiles = 0, deletedLocalFiles = 0;
+
+ private void downloadDescriptors(DescriptorType descriptorType) {
+ if (!this.descriptorListeners.containsKey(descriptorType) &&
+ !this.fingerprintListeners.containsKey(descriptorType)) {
+ return;
+ }
+ DescriptorDownloader descriptorDownloader =
+ new DescriptorDownloader(descriptorType);
+ this.localFilesBefore += descriptorDownloader.statLocalFiles();
+ this.foundRemoteFiles +=
+ descriptorDownloader.fetchRemoteDirectory();
+ this.downloadedFiles += descriptorDownloader.fetchRemoteFiles();
+ this.deletedLocalFiles += descriptorDownloader.deleteOldLocalFiles();
+ }
+
public void readDescriptors() {
/* Careful when changing the order of parsing descriptor types! The
* various status updaters may base assumptions on this order. */
@@ -445,6 +639,14 @@ public class DescriptorSource {
public String getStatsString() {
StringBuilder sb = new StringBuilder();
+ sb.append(" " + this.localFilesBefore + " descriptor files found "
+ + "locally\n");
+ sb.append(" " + this.foundRemoteFiles + " descriptor files found "
+ + "remotely\n");
+ sb.append(" " + this.downloadedFiles + " descriptor files "
+ + "downloaded from remote\n");
+ sb.append(" " + this.deletedLocalFiles + " descriptor files "
+ + "deleted locally\n");
sb.append(" " + this.descriptorQueues.size() + " descriptor "
+ "queues created\n");
int historySizeBefore = 0, historySizeAfter = 0;
diff --git a/src/org/torproject/onionoo/Main.java b/src/org/torproject/onionoo/Main.java
index cceef92..347701c 100644
--- a/src/org/torproject/onionoo/Main.java
+++ b/src/org/torproject/onionoo/Main.java
@@ -58,6 +58,9 @@ public class Main {
DocumentWriter[] dws = new DocumentWriter[] { ddw, bdw, wdw, cdw,
udw };
+ Logger.printStatus("Downloading descriptors.");
+ dso.downloadDescriptors();
+
Logger.printStatus("Reading descriptors.");
dso.readDescriptors();
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits