[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] r24768: {} Only leave a note that the code moved to Git. (in projects/archives/trunk: . bridge-desc-sanitizer exonerator)
Author: kloesing
Date: 2011-05-18 20:15:43 +0000 (Wed, 18 May 2011)
New Revision: 24768
Added:
projects/archives/trunk/README
Removed:
projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java
projects/archives/trunk/bridge-desc-sanitizer/HOWTO
projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh
projects/archives/trunk/exonerator/ExoneraTor.java
projects/archives/trunk/exonerator/HOWTO
projects/archives/trunk/exonerator/LICENSE
projects/archives/trunk/exonerator/exonerator.py
Log:
Only leave a note that the code moved to Git.
Added: projects/archives/trunk/README
===================================================================
--- projects/archives/trunk/README (rev 0)
+++ projects/archives/trunk/README 2011-05-18 20:15:43 UTC (rev 24768)
@@ -0,0 +1,7 @@
+---------------------------------------------------------------------------
+
+ THIS REPOSITORY HAS MOVED TO GIT!
+
+ git clone git://git.torproject.org/metrics-utils/
+
+---------------------------------------------------------------------------
Deleted: projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java 2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java 2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,504 +0,0 @@
-import java.io.*;
-import java.util.*;
-import com.maxmind.geoip.*;
-import org.apache.commons.codec.digest.*;
-import org.apache.commons.codec.binary.*;
-
-public class ConvertBridgeDescs {
-
- public static void main(String[] args) throws Exception {
-
- /* If the following flag is set to true, don't write 127.0.0.1 for
- * bridge IP addresses, but put replace IP addresses with
- * H(IP address + bridge identity + secret)[:4] formatted as IP
- * address. An example for the hash input would be:
- * "12.34.56.78ABCDABCDABCDABCDABCDABCDABCDABCDABCDABCDpassword"
- * (without quotes) */
- boolean hashIpAddresses = false;
- String secret = "password";
-
- long started = System.currentTimeMillis();
-
- if (args.length < 5) {
- System.err.println("Usage: java "
- + ConvertBridgeDescs.class.getSimpleName()
- + " <input directory> <geoip.txt file> <YYYY> <MM> "
- + "<output directory>");
- System.exit(1);
- }
- File inDir = new File(args[0]);
- File geoipFile = new File(args[1]);
- LookupService cl = new LookupService(geoipFile,
- LookupService.GEOIP_MEMORY_CACHE);
- Set<String> unresolved = new HashSet<String>();
- unresolved.add("--");
- unresolved.add("a1");
- unresolved.add("a2");
- unresolved.add("eu");
- unresolved.add("ap");
- String year = args[2];
- String month = args[3];
- int yearInt = Integer.parseInt(year);
- int monthInt = Integer.parseInt(month);
- File outDir = new File(args[4] + File.separator
- + "bridge-descriptors-" + year + "-" + month);
- outDir.mkdirs();
-
- SortedSet<File> statuses = new TreeSet<File>();
- Set<File> descriptors = new HashSet<File>();
- Set<File> extrainfos = new HashSet<File>();
-
- System.out.println("Checking files in " + inDir.getAbsolutePath()
- + "...");
- Stack<File> directoriesLeftToParse = new Stack<File>();
- directoriesLeftToParse.push(inDir);
- String currentYearAndMonth = "from-tonga-" + year + "-" + month;
- String previousYearAndMonth = "from-tonga-" + (monthInt == 1 ?
- "" + (yearInt - 1) + "-12" :
- year + "-" + (monthInt < 11 ? "0" : "") + (monthInt - 1));
- String nextYearAndMonth = "from-tonga-" + (monthInt == 12 ?
- "" + (yearInt + 1) + "-01" :
- year + "-" + (monthInt < 9 ? "0" : "") + (monthInt + 1));
- while (!directoriesLeftToParse.isEmpty()) {
- File directoryOrFile = directoriesLeftToParse.pop();
- String filename = directoryOrFile.getName();
- if (directoryOrFile.isDirectory()) {
- if (/* base directory */
- filename.equals(inDir.getName()) ||
- /* current month */
- filename.startsWith(currentYearAndMonth) ||
- /* last days of previous month */
- (filename.startsWith(previousYearAndMonth)
- && Integer.parseInt(filename.substring(19, 21)) > 24) ||
- /* first days of next month */
- (filename.startsWith(nextYearAndMonth)
- && Integer.parseInt(filename.substring(19, 21)) < 6)) {
- for (File fileInDir : directoryOrFile.listFiles()) {
- directoriesLeftToParse.push(fileInDir);
- }
- }
- continue;
- }
- if (filename.startsWith("cached-extrainfo")) {
- extrainfos.add(directoryOrFile);
- } else if (filename.equals("bridge-descriptors")) {
- descriptors.add(directoryOrFile);
- } else if (filename.equals("networkstatus-bridges")) {
- statuses.add(directoryOrFile);
- }
- }
-
- int days = ((extrainfos.size() / 2 + descriptors.size()
- + statuses.size()) + 3 * 24) / (3 * 48);
- System.out.println("Found " + extrainfos.size()
- + " cached-extrainfo[.new] files, " + descriptors.size()
- + " bridge-descriptors files, and " + statuses.size()
- + " networkstatus-bridges files, covering approximately " + days
- + " days.");
-
- System.out.print("Parsing server descriptors to find out country "
- + "codes of bridges in extra-info descriptors");
- Map<String, String> bridgeCountries = new HashMap<String, String>();
- int parsed = 0;
- for (File file : descriptors) {
- if (parsed++ > descriptors.size() / days) {
- System.out.print(".");
- parsed = 0;
- }
- BufferedReader br = new BufferedReader(new FileReader(file));
- String line = null, routerLine = null;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("router ")) {
- routerLine = line;
- } else if (line.startsWith("opt extra-info-digest ")) {
- String extraInfoDigest = line.split(" ")[2];
- String countryCode = cl.getCountry(routerLine.split(" ")[2]).
- getCode();
- if (bridgeCountries.containsKey(extraInfoDigest) &&
- !bridgeCountries.get(extraInfoDigest).
- equals(countryCode)) {
- System.out.println("Mapping already contains extra-info "
- + "digest " + extraInfoDigest + " with different "
- + "country. Exiting.");
- System.exit(1);
- }
- bridgeCountries.put(extraInfoDigest, countryCode);
- }
- }
- }
- System.out.println("Mapping contains " + bridgeCountries.size()
- + " entries.");
-
- System.out.print("Parsing extra-info descriptors");
- String[] hex = new String[] { "0", "1", "2", "3", "4", "5", "6", "7",
- "8", "9", "a", "b", "c", "d", "e", "f" };
- for (String x : hex)
- for (String y : hex)
- new File(outDir + File.separator + "extra-infos" + File.separator
- + x + File.separator + y).mkdirs();
- int writtenExtrainfos = 0;
- Map<String, String> extrainfoMapping = new HashMap<String, String>();
- parsed = 0;
- for (File file : extrainfos) {
- if (parsed++ > extrainfos.size() / days) {
- System.out.print(".");
- parsed = 0;
- }
- FileInputStream fis = new FileInputStream(file);
- BufferedInputStream bis = new BufferedInputStream(fis);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- int startDescriptorIndex = -1, endDescriptorIndex = -1;
- String asciiString = new String(allData, "US-ASCII");
- BufferedReader br = new BufferedReader(new StringReader(
- asciiString));
- String line = null;
- StringBuilder scrubbed = null;
- boolean skipSignature = false;
- boolean skipDescriptor = false;
- while ((line = br.readLine()) != null) {
- if (skipSignature && !line.equals("-----END SIGNATURE-----")) {
- continue;
- } else if (line.startsWith("extra-info ")) {
- endDescriptorIndex = startDescriptorIndex =
- asciiString.indexOf(line, startDescriptorIndex + 1);
- scrubbed = new StringBuilder(DigestUtils.shaHex(Hex.decodeHex(
- line.split(" ")[2].toCharArray())).toUpperCase() + "\n");
- } else if (line.startsWith("published ")
- || line.startsWith("write-history ")
- || line.startsWith("read-history ")
- || line.startsWith("geoip-start-time ")
- || line.startsWith("geoip-client-origins ")
- || line.startsWith("bridge-stats-end ")
- || line.startsWith("bridge-ips ")) {
- scrubbed.append(line + "\n");
- } else if (line.startsWith("router-signature")) {
- if (skipDescriptor) {
- System.out.println("Skipping!");
- skipDescriptor = false;
- } else {
- endDescriptorIndex = asciiString.indexOf(line,
- endDescriptorIndex + 1) + line.length() + 1;
- byte[] forDigest = new byte[endDescriptorIndex -
- startDescriptorIndex];
- System.arraycopy(allData, startDescriptorIndex, forDigest, 0,
- endDescriptorIndex - startDescriptorIndex);
- String originalHash = DigestUtils.shaHex(forDigest);
- String countryCode = "ZZ";
- if (bridgeCountries.containsKey(originalHash.toUpperCase())) {
- countryCode = bridgeCountries.get(originalHash.toUpperCase());
- }
- String scrubbedDesc = "extra-info Unnamed" + countryCode + " "
- + scrubbed.toString();
- String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
- if (extrainfoMapping.containsKey(originalHash) &&
- !extrainfoMapping.get(originalHash).equals(scrubbedHash)) {
- System.out.println("We already have an extra-info mapping "
- + "from " + originalHash + " to "
- + extrainfoMapping.get(originalHash) + ", but we now "
- + "want to add a mapping to " + scrubbedHash
- + ". Exiting");
- System.exit(1);
- }
- extrainfoMapping.put(originalHash, scrubbedHash);
- File out = new File(outDir + File.separator + "extra-infos"
- + File.separator + scrubbedHash.charAt(0) + File.separator
- + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
- if (!out.exists()) {
- BufferedWriter bw = new BufferedWriter(new FileWriter(out));
- bw.write(scrubbedDesc);
- bw.close();
- writtenExtrainfos++;
- }
- }
- } else if (line.equals("-----BEGIN SIGNATURE-----")) {
- skipSignature = true;
- } else if (line.equals("-----END SIGNATURE-----")) {
- skipSignature = false;
- } else if (line.startsWith("dirreq-") || line.startsWith("cell-")
- || line.startsWith("exit-")) {
- continue;
- } else {
- System.out.println("Unrecognized line '" + line + "'. Skipping");
- skipDescriptor = true;
- }
- }
- br.close();
- }
- System.out.println("\nWrote " + writtenExtrainfos
- + " extra-info descriptors.");
-
- System.out.print("Parsing server descriptors");
- for (String x : hex)
- for (String y : hex)
- new File(outDir + File.separator + "server-descriptors"
- + File.separator + x + File.separator + y).mkdirs();
- int writtenDescriptors = 0;
- Map<String, String> descriptorMapping = new HashMap<String, String>();
- int found = 0, notfound = 0;
- parsed = 0;
- String haveExtraInfo = null;
- for (File file : descriptors) {
- if (parsed++ > descriptors.size() / days) {
- System.out.print(".");
- parsed = 0;
- }
- FileInputStream fis = new FileInputStream(file);
- BufferedInputStream bis = new BufferedInputStream(fis);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- int len;
- byte[] data = new byte[1024];
- while ((len = bis.read(data, 0, 1024)) >= 0) {
- baos.write(data, 0, len);
- }
- bis.close();
- byte[] allData = baos.toByteArray();
- int startDescriptorIndex = -1, endDescriptorIndex = -1;
- String asciiString = new String(allData, "US-ASCII");
- BufferedReader br = new BufferedReader(new StringReader(
- asciiString));
- String line = null, country = null, originalAddress = null,
- ipAddress = "127.0.0.1", routerLinePartOne = null,
- routerLinePartTwo = null;
- StringBuilder scrubbed = null;
- boolean skipCrypto = false, contactWritten = false;
- while ((line = br.readLine()) != null) {
- if (skipCrypto && !line.startsWith("-----END ")) {
- continue;
- } else if (line.startsWith("router ")) {
- endDescriptorIndex = startDescriptorIndex =
- asciiString.indexOf(line, startDescriptorIndex + 1);
- country = cl.getCountry(line.split(" ")[2]).getCode().
- toLowerCase();
- if (unresolved.contains(country)) {
- country = "zz";
- }
- originalAddress = line.split(" ")[2];
- scrubbed = new StringBuilder();
- routerLinePartOne = "router Unnamed" + country.toUpperCase();
- routerLinePartTwo = line.split(" ")[3] + " "
- + line.split(" ")[4] + " " + line.split(" ")[5] + "\n";
- contactWritten = false;
- haveExtraInfo = null;
- } else if (line.startsWith("opt fingerprint ")) {
- scrubbed.append("opt fingerprint");
- String fingerprint = DigestUtils.shaHex(Hex.decodeHex(
- line.substring(16).replaceAll(" ", "").toCharArray())).
- toUpperCase();
- for (int i = 0; i < fingerprint.length() / 4; i++)
- scrubbed.append(" " + fingerprint.substring(4 * i,
- 4 * (i + 1)));
- scrubbed.append("\n");
- if (hashIpAddresses) {
- byte[] hashedOctets = DigestUtils.sha(originalAddress
- + line.substring(16).replaceAll(" ", "") + secret);
- String hashedIp = "";
- for (int i = 0; i < 4; i++) {
- hashedIp += "." + ((int) hashedOctets[i] + 256) % 256;
- }
- ipAddress = hashedIp.substring(1);
- }
- } else if (line.startsWith("contact ")) {
- scrubbed.append("contact somebody at example dot " + country
- + "\n");
- contactWritten = true;
- } else if (line.startsWith("router-signature")) {
- endDescriptorIndex = asciiString.indexOf(line,
- endDescriptorIndex + 1) + line.length() + 1;
- byte[] forDigest = new byte[endDescriptorIndex -
- startDescriptorIndex];
- System.arraycopy(allData, startDescriptorIndex, forDigest, 0,
- endDescriptorIndex - startDescriptorIndex);
- String originalHash = DigestUtils.shaHex(forDigest);
- String scrubbedDesc = routerLinePartOne + " " + ipAddress
- + " " + routerLinePartTwo + scrubbed.toString();
- String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
- if (descriptorMapping.containsKey(originalHash) &&
- !descriptorMapping.get(originalHash).equals(scrubbedHash)) {
- System.out.println("We already have a descriptor mapping "
- + "from " + originalHash + " to "
- + descriptorMapping.get(originalHash) + ", but we now "
- + "want to add a mapping to " + scrubbedHash
- + ". Exiting");
- System.exit(1);
- }
- descriptorMapping.put(originalHash, scrubbedHash);
- if (haveExtraInfo != null) {
- File out = new File(outDir + File.separator
- + "server-descriptors" + File.separator
- + scrubbedHash.charAt(0) + File.separator
- + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
- if (!out.exists()) {
- BufferedWriter bw2 = new BufferedWriter(new FileWriter(out));
- bw2.write(scrubbedDesc);
- bw2.close();
- writtenDescriptors++;
- }
- }
- } else if (line.startsWith("opt extra-info-digest ")) {
- String originalExtraInfo = line.split(" ")[2].toLowerCase();
- if (!extrainfoMapping.containsKey(originalExtraInfo)) {
- notfound++;
- haveExtraInfo = "0000000000000000000000000000000000000000";
- } else {
- found++;
- haveExtraInfo = extrainfoMapping.get(originalExtraInfo).
- toUpperCase();
- }
- scrubbed.append("opt extra-info-digest " + haveExtraInfo
- + "\n");
- } else if (line.startsWith("reject ")
- || line.startsWith("accept ")) {
- if (!contactWritten) {
- scrubbed.append("contact nobody at example dot " + country
- + "\n");
- contactWritten = true;
- }
- scrubbed.append(line + "\n");
- } else if (line.startsWith("platform ")
- || line.startsWith("opt protocols ")
- || line.startsWith("published ")
- || line.startsWith("uptime ")
- || line.startsWith("bandwidth ")
- || line.startsWith("opt hibernating ")
- || line.equals("opt hidden-service-dir")
- || line.equals("opt caches-extra-info")
- || line.equals("opt allow-single-hop-exits")) {
- scrubbed.append(line + "\n");
- } else if (line.startsWith("family ")) {
- StringBuilder familyLine = new StringBuilder("family");
- for (String s : line.substring(7).split(" ")) {
- if (s.startsWith("$")) {
- familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
- s.substring(1).toCharArray())).toUpperCase());
- } else {
- familyLine.append(" Unnamed");
- }
- }
- scrubbed.append(familyLine.toString() + "\n");
- } else if (line.startsWith("@purpose ")) {
- continue;
- } else if (line.startsWith("-----BEGIN ")
- || line.equals("onion-key") || line.equals("signing-key")) {
- skipCrypto = true;
- } else if (line.startsWith("-----END ")) {
- skipCrypto = false;
- } else {
- System.out.println("Unrecognized line '" + line + "'. Exiting");
- System.exit(1);
- }
- }
- br.close();
- }
- System.out.println("\nWrote " + writtenDescriptors
- + " bridge descriptors. While parsing, we found that we parsed "
- + found + " extra-info identifiers before, but are missing "
- + notfound + ". (The number of missing identifiers should be "
- + "significantly smaller.)");
-
- System.out.print("Parsing network statuses");
- parsed = notfound = found = 0;
- for (File file : statuses) {
- if (parsed++ > statuses.size() / days) {
- System.out.print(".");
- parsed = 0;
- }
- if (!file.getParent().substring(file.getParent().
- indexOf("from-tonga-")).startsWith(currentYearAndMonth)) {
- continue;
- }
- BufferedReader br = new BufferedReader(new FileReader(file));
- String line = null;
- StringBuilder scrubbed = new StringBuilder();
- while ((line = br.readLine()) != null) {
- if (line.startsWith("r ")) {
- String[] parts = line.split(" ");
- String bridgeIdentity = parts[2] + "==";
- String hashedBridgeIdentity = Base64.encodeBase64String(
- DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))).
- substring(0, 27);
- String descIdentifier = parts[3] + "==";
- String hexDescIdentifier = Hex.encodeHexString(
- Base64.decodeBase64(descIdentifier));
- String replacementDescIdentifier = null;
- if (!descriptorMapping.containsKey(hexDescIdentifier)) {
- notfound++;
- replacementDescIdentifier = "AAAAAAAAAAAAAAAAAAAAAAAAAAA";
- } else {
- found++;
- String refDesc = descriptorMapping.get(hexDescIdentifier).
- toLowerCase();
- File descriptorFile = new File(outDir + File.separator
- + "server-descriptors" + File.separator
- + refDesc.charAt(0) + File.separator + refDesc.charAt(1)
- + File.separator + refDesc);
- if (!descriptorFile.exists()) {
- System.out.println("Descriptor file '"
- + descriptorFile.getAbsolutePath() + "' does not exist.");
- System.exit(1);
- }
- replacementDescIdentifier = Base64.encodeBase64String(
- Hex.decodeHex(descriptorMapping.get(hexDescIdentifier).
- toCharArray())).substring(0, 27);
- }
- String country = cl.getCountry(parts[6]).getCode().
- toLowerCase();
- if (unresolved.contains(country)) {
- country = "zz";
- }
- String ipAddress = "127.0.0.1";
- if (hashIpAddresses) {
- byte[] hashedOctets = DigestUtils.sha(parts[6]
- + Hex.encodeHexString(Base64.decodeBase64(
- bridgeIdentity)).toUpperCase() + secret);
- String hashedIp = "";
- for (int i = 0; i < 4; i++) {
- hashedIp += "." + ((int) hashedOctets[i] + 256) % 256;
- }
- ipAddress = hashedIp.substring(1);
- }
- scrubbed.append("r Unnamed" + country.toUpperCase() + " "
- + hashedBridgeIdentity
- + " " + replacementDescIdentifier + " " + parts[4] + " "
- + parts[5] + " " + ipAddress + " " + parts[7] + " "
- + parts[8] + "\n");
- } else if (line.startsWith("s ")) {
- scrubbed.append(line + "\n");
- } else {
- System.out.println("Unknown line: " + line);
- System.exit(1);
- }
- }
- String timeString = file.getParent().substring(file.getParent().
- indexOf("from-tonga-") + 11);
- String[] date = timeString.substring(0, 10).split("-");
- String time = timeString.substring(11, 17);
- File dir = new File(outDir + File.separator + "statuses"
- + File.separator + date[2] + File.separator);
- dir.mkdirs();
- File out = new File(dir.getAbsolutePath() + File.separator + date[0]
- + date[1] + date[2] + "-" + time + "-"
- + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
- if (!out.exists()) {
- BufferedWriter bw3 = new BufferedWriter(new FileWriter(out));
- bw3.write(scrubbed.toString());
- bw3.close();
- }
- }
- System.out.println("\nWhile parsing, we found that we parsed "
- + found + " bridge descriptors before, but are missing "
- + notfound + ". (The number of missing identifiers should be "
- + "significantly smaller.)");
-
- long finished = System.currentTimeMillis();
- System.out.println("Processing took " + ((finished - started) / 1000)
- + " seconds.");
- }
-}
-
Deleted: projects/archives/trunk/bridge-desc-sanitizer/HOWTO
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/HOWTO 2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/bridge-desc-sanitizer/HOWTO 2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,138 +0,0 @@
-Bridge descriptor sanitizer
-
----------------------------------------------------------------------------
-
- THIS REPOSITORY HAS MOVED TO GIT!
-
- git clone git://git.torproject.org/metrics-utils/
-
----------------------------------------------------------------------------
-
-Introduction:
-
-The bridge authority Tonga maintains a list of bridges in order to serve
-bridge addresses and descriptors to its clients. Every half hour, Tonga
-takes a snapshot of the known bridge descriptors and copies them to
-byblos for later statistical analysis. As a guiding principle, the Tor
-project makes all data that it uses for statistical analysis available to
-the interested public, in order to maximize transparency towards the
-community. However, the bridge descriptors contain the IP addresses and
-other contact information of bridges that must not be made public, or the
-purpose of bridges as non-public entry points into the Tor network would
-be obsolete. This script takes the half-hourly snapshots as input, removes
-all possibly sensitive information from the descriptors, and puts out the
-sanitized bridge descriptors that are safe to be published.
-
----------------------------------------------------------------------------
-
-Processing steps:
-
-The following steps are taken to remove all potentially sensitive
-information from the bridge descriptors while keeping them useful for
-statistical analysis.
-
-1. Replace the bridge identity with its SHA1 value
-
- Clients can request a bridge's current descriptor by sending its
- identity string to the bridge authority. This is a feature to make
- bridges on dynamic IP addresses useful. Therefore, the original
- identities (and anything that could be used to derive them) need to be
- removed from the descriptors. The bridge identity is replaced with its
- SHA1 hash value. The idea is to have a consistent replacement that
- remains stable over months or even years (without keeping a secret for a
- keyed hash function).
-
-2. Remove all cryptographic keys and signatures
-
- It would be straightforward to learn about the bridge identity from the
- bridge's public key. Replacing keys by newly generated ones seemed to be
- unnecessary (and would involve keeping a state over months/years), so
- that all cryptographic objects have simply been removed.
-
-3. Replace IP address with 127.0.0.1
-
- Of course, the IP address needs to be removed, too. However, the IP
- address is resolved to a country code first and the result written to
- the contact line as "somebody at example dot de" for Germany, etc. The
- ports are kept unchanged though.
-
-4. Replace contact information
-
- If there is contact information in a descriptor, the contact line is
- changed to "somebody at ...". If there is none, a contact line is added
- saying "nobody at ..." in order to put in the country code. If the
- bridge's IP address cannot be resolved to a country, the unassigned
- country code "zz" is written to the contact line.
-
-5. Replace nickname with UnnamedCC
-
- The bridge nicknames might give hints on the location of the bridge if
- chosen without care; e.g. a bridge nickname might be very similar to the
- operators' relay nicknames which might be located on adjacent IP
- addresses. All bridge nicknames are therefore replaced with the string
- UnnamedCC with CC being the upper-case country code.
-
-6. Replace references to descriptors
-
- Changing anything in the server descriptors or extra-info descriptors
- invalidates the references from network statuses or server descriptors,
- respectively. All references are replaced with the new hashes of
- referenced descriptors, if available. In case of missing descriptors,
- references are replaced with all zeros (or 'A's in base 64 encoding).
-
-Note that these processing steps only prevent people from learning about
-new bridge locations. People who already know a bridge identity or location
-can easily learn more about this bridge from the sanitized descriptors.
-This is useful for statistical analysis, e.g. to filter out bridges that
-have been running as relays before.
-
----------------------------------------------------------------------------
-
-Quick Start:
-
-The following steps are necessary to process the half-hourly snapshots as
-collected by moria:
-
-- Install Java 5 or higher.
-
-- Download Apache Commons Codec 1.4 or higher for Base 64 and hex encoding
- from http://commons.apache.org/codec/ and place the .jar (in the
- following assumed to be commons-codec-1.4.jar) in the same directory as
- this HOWTO file.
-
-- Download MaxMind GeoIP Java library from http://geolite.maxmind.com/
- download/geoip/api/java/ and generate a JAR file as described in the
- README file. Place the resulting maxmindgeoip.jar in the same directory
- as this HOWTO file.
-
-- Copy the half-hourly snapshots named from-tonga-YYYY-MM-DDThhmmssZ.tar.gz
- in a directory called data/ in the same directory as this HOWTO file.
-
-- Run ./extract-bridges.sh to extract the half-hourly snapshots in data/
- to separate directories in the newly created subdirectory in/ .
-
-- Put the binary MaxMind GeoIP database file that shall be used for
- resolving IP addresses to country codes in the same directory as this
- HOWTO file. Either the free or the commercial version of the database
- can be used. For the archives provided by The Tor Project, the first
- available commercial version of the subsequent month is used.
-
-- Compile the Java class using
-
- $ javac -cp commons-codec-1.4.jar:maxmindgeoip.jar
- ConvertBridgeDescs.java
-
-- Run the script, providing it with the parameters it needs:
-
- java -cp .:commons-codec-1.4.jar:maxmindgeoip.jar ConvertBridgeDescs
- <input directory> <geoip database file> <YYYY> <MM>
- <output directory>
-
- Note that YYYY and MM specify the month that shall be processed. The other
- descriptors in the input directory are ignored.
-
- A sample invocation might be:
-
- $ java -cp .:commons-codec-1.4.jar:maxmindgeoip.jar ConvertBridgeDescs
- in/ GeoIP-106_20081101.dat 2008 10 out/
-
Deleted: projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh 2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh 2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,8 +0,0 @@
-#!/bin/bash
-mkdir "in/"
-for i in `ls data/ | cut -c 1-29`
-do
-mkdir "in/"$i
-tar -C "in/"$i -xf "data/"$i".tar.gz"
-done
-
Deleted: projects/archives/trunk/exonerator/ExoneraTor.java
===================================================================
--- projects/archives/trunk/exonerator/ExoneraTor.java 2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/exonerator/ExoneraTor.java 2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,404 +0,0 @@
-/* Copyright 2009 The Tor Project
- * See LICENSE for licensing information */
-
-import java.io.*;
-import java.math.*;
-import java.text.*;
-import java.util.*;
-import org.bouncycastle.util.encoders.Base64;
-
-public final class ExoneraTor {
-
- public static void main(final String[] args) throws Exception {
-
- // check parameters
- if (args.length < 4 || args.length > 5) {
- System.err.println("\nUsage: java "
- + ExoneraTor.class.getSimpleName()
- + " <descriptor archive directory> <IP address in question> "
- + "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> "
- + "[<target address>[:<target port>]]\n");
- return;
- }
- File archiveDirectory = new File(args[0]);
- if (!archiveDirectory.exists() || !archiveDirectory.isDirectory()) {
- System.err.println("\nDescriptor archive directory + "
- + archiveDirectory.getAbsolutePath()
- + " does not exist or is not a directory.\n");
- return;
- }
- String relayIP = args[1];
- String timestampStr = args[2] + " " + args[3];
- SimpleDateFormat timeFormat = new SimpleDateFormat(
- "yyyy-MM-dd HH:mm:ss");
- timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- long timestamp = timeFormat.parse(timestampStr).getTime();
- String target = null, targetIP = null, targetPort = null;
- String[] targetIPParts = null;
- if (args.length > 4) {
- target = args[4];
- if (target.contains(":")) {
- targetIP = target.split(":")[0];
- targetPort = target.split(":")[1];
- } else {
- targetIP = target;
- }
- targetIPParts = targetIP.replace(".", " ").split(" ");
- }
- String DELIMITER = "--------------------------------------------------"
- + "-------------------------";
- System.out.println("\nTrying to find out whether " + relayIP + " was "
- + "running as a Tor relay at " + timestampStr
- + (target != null ? " permitting exiting to " + target : "")
- + "...\n\n" + DELIMITER);
-
- // check that we have the required archives
- long timestampTooOld = timestamp - 300 * 60 * 1000;
- long timestampFrom = timestamp - 180 * 60 * 1000;
- long timestampTooNew = timestamp + 120 * 60 * 1000;
- Calendar calTooOld = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- Calendar calFrom = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- Calendar calTooNew = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
- calTooOld.setTimeInMillis(timestampTooOld);
- calFrom.setTimeInMillis(timestampFrom);
- calTooNew.setTimeInMillis(timestampTooNew);
- System.out.printf("%nChecking that relevant archives between "
- + "%tF %<tT and %tF %<tT are available...%n", calTooOld,
- calTooNew);
- SortedSet<String> requiredDirs = new TreeSet<String>();
- requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooOld));
- requiredDirs.add(String.format("consensuses-%tY-%<tm", calTooNew));
- if (target != null) {
- requiredDirs.add(String.format("server-descriptors-%tY-%<tm",
- calTooOld));
- requiredDirs.add(String.format("server-descriptors-%tY-%<tm",
- calTooNew));
- }
- SortedSet<File> consensusDirs = new TreeSet<File>();
- SortedSet<File> descriptorsDirs = new TreeSet<File>();
- Stack<File> directoriesLeftToParse = new Stack<File>();
- directoriesLeftToParse.push(archiveDirectory);
- while (!directoriesLeftToParse.isEmpty()) {
- File directoryOrFile = directoriesLeftToParse.pop();
- if (directoryOrFile.getName().startsWith("consensuses-")) {
- if (requiredDirs.contains(directoryOrFile.getName())) {
- requiredDirs.remove(directoryOrFile.getName());
- consensusDirs.add(directoryOrFile);
- }
- } else if (directoryOrFile.getName().startsWith(
- "server-descriptors-")) {
- if (requiredDirs.contains(directoryOrFile.getName())) {
- requiredDirs.remove(directoryOrFile.getName());
- descriptorsDirs.add(directoryOrFile);
- }
- } else {
- for (File fileInDir : directoryOrFile.listFiles())
- if (fileInDir.isDirectory())
- directoriesLeftToParse.push(fileInDir);
- }
- }
- for (File dir : consensusDirs)
- System.out.println(" " + dir.getAbsolutePath());
- for (File dir : descriptorsDirs)
- System.out.println(" " + dir.getAbsolutePath());
- if (!requiredDirs.isEmpty()) {
- System.out.println("\nWe are missing consensuses and/or server "
- + "descriptors. Please download these archives and extract them "
- + "to your data directory. Be sure NOT to rename the extracted "
- + "directories or the contained files.");
- for (String dir : requiredDirs)
- System.out.println(" " + dir + ".tar.bz2");
- return;
- }
-
- // look for consensus files
- System.out.printf("%nLooking for relevant consensuses between "
- + "%tF %<tT and %s...%n", calFrom, timestampStr);
- SortedSet<File> tooOldConsensuses = new TreeSet<File>();
- SortedSet<File> relevantConsensuses = new TreeSet<File>();
- SortedSet<File> tooNewConsensuses = new TreeSet<File>();
- directoriesLeftToParse.clear();
- for (File consensusDir : consensusDirs)
- directoriesLeftToParse.push(consensusDir);
- SimpleDateFormat consensusTimeFormat = new SimpleDateFormat(
- "yyyy-MM-dd-HH-mm-ss");
- consensusTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
- while (!directoriesLeftToParse.isEmpty()) {
- File directoryOrFile = directoriesLeftToParse.pop();
- if (directoryOrFile.isDirectory()) {
- for (File fileInDir : directoryOrFile.listFiles()) {
- directoriesLeftToParse.push(fileInDir);
- }
- continue;
- } else {
- String filename = directoryOrFile.getName();
- if (filename.endsWith("consensus")) {
- long consensusTime = consensusTimeFormat.parse(
- filename.substring(0, 19)).getTime();
- if (consensusTime >= timestampTooOld &&
- consensusTime < timestampFrom)
- tooOldConsensuses.add(directoryOrFile);
- else if (consensusTime >= timestampFrom &&
- consensusTime <= timestamp)
- relevantConsensuses.add(directoryOrFile);
- else if (consensusTime > timestamp &&
- consensusTime <= timestampTooNew)
- tooNewConsensuses.add(directoryOrFile);
- }
- }
- }
- SortedSet<File> allConsensuses = new TreeSet<File>();
- allConsensuses.addAll(tooOldConsensuses);
- allConsensuses.addAll(relevantConsensuses);
- allConsensuses.addAll(tooNewConsensuses);
- if (allConsensuses.isEmpty()) {
- System.out.println(" None found!\n\n" + DELIMITER + "\n\nResult is "
- + "INDECISIVE!\n\nWe cannot make any statement about IP address "
- + relayIP + " being a relay at " + timestampStr + " or not! We "
- + "did not find any relevant consensuses preceding the given "
- + "time. This either means that you did not download and "
- + "extract the consensus archives preceding the hours before "
- + "the given time, or (in rare cases) that the directory "
- + "archives are missing the hours before the timestamp. Please "
- + "check that your directory archives contain consensus files "
- + "of the interval 5:00 hours before and 2:00 hours after the "
- + "time you are looking for.\n");
- return;
- }
- for (File f : relevantConsensuses)
- System.out.println(" " + f.getAbsolutePath());
-
- // parse consensuses to find descriptors belonging to the IP address
- System.out.println("\nLooking for descriptor identifiers referenced "
- + "in \"r \" lines in these consensuses containing IP address "
- + relayIP + "...");
- SortedSet<File> positiveConsensusesNoTarget = new TreeSet<File>();
- Set<String> addressesInSameNetwork = new HashSet<String>();
- SortedMap<String, Set<File>> relevantDescriptors =
- new TreeMap<String, Set<File>>();
- for (File consensus : allConsensuses) {
- if (relevantConsensuses.contains(consensus))
- System.out.println(" " + consensus.getAbsolutePath());
- BufferedReader br = new BufferedReader(new FileReader(consensus));
- String line;
- while ((line = br.readLine()) != null) {
- if (!line.startsWith("r "))
- continue;
- String[] parts = line.split(" ");
- String address = parts[6];
- if (address.equals(relayIP)) {
- byte[] result = Base64.decode(parts[3] + "==");
- String hex = String.format("%040x", new BigInteger(1,
- Base64.decode(parts[3] + "==")));
- if (!relevantDescriptors.containsKey(hex))
- relevantDescriptors.put(hex, new HashSet<File>());
- relevantDescriptors.get(hex).add(consensus);
- positiveConsensusesNoTarget.add(consensus);
- if (relevantConsensuses.contains(consensus))
- System.out.println(" \"" + line + "\" references "
- + "descriptor " + hex);
- } else {
- if (relayIP.startsWith(address.substring(0,
- address.lastIndexOf(".")))) {
- addressesInSameNetwork.add(address);
- }
- }
- }
- br.close();
- }
- if (relevantDescriptors.isEmpty()) {
- System.out.printf(" None found!\n\n" + DELIMITER + "\n\nResult is "
- + "NEGATIVE with moderate certainty!\n\nWe did not find IP "
- + "address " + relayIP + " in any of the consensuses that were "
- + "published between %tF %<tT and %tF %<tT.\n\nA possible "
- + "reason for false negatives is that the relay is using a "
- + "different IP address when generating a descriptor than for "
- + "exiting to the Internet. We hope to provide better checks "
- + "for this case in the future.", calTooOld, calTooNew);
- if (!addressesInSameNetwork.isEmpty()) {
- System.out.println("\n\nThe following other IP addresses of Tor "
- + "relays were found in the mentioned consensus files that "
- + "are in the same /24 network and that could be related to "
- + "IP address " + relayIP + ":");
- for (String s : addressesInSameNetwork) {
- System.out.println(" " + s);
- }
- }
- System.out.println();
- return;
- }
-
- // parse router descriptors to check exit policies
- SortedSet<File> positiveConsensuses = new TreeSet<File>();
- Set<String> missingDescriptors = new HashSet<String>();
- if (target != null) {
- System.out.println("\nChecking if referenced descriptors permit "
- + "exiting to " + target + "...");
- Set<String> descriptors = relevantDescriptors.keySet();
- missingDescriptors.addAll(relevantDescriptors.keySet());
- directoriesLeftToParse.clear();
- for (File descriptorsDir : descriptorsDirs)
- directoriesLeftToParse.push(descriptorsDir);
- while (!directoriesLeftToParse.isEmpty()) {
- File directoryOrFile = directoriesLeftToParse.pop();
- if (directoryOrFile.isDirectory()) {
- for (File fileInDir : directoryOrFile.listFiles()) {
- directoriesLeftToParse.push(fileInDir);
- }
- continue;
- } else {
- String filename = directoryOrFile.getName();
- for (String descriptor : descriptors) {
- if (filename.equals(descriptor)) {
- missingDescriptors.remove(descriptor);
- BufferedReader br = new BufferedReader(
- new FileReader(directoryOrFile));
- String line;
- while ((line = br.readLine()) != null) {
- if (line.startsWith("reject ") ||
- line.startsWith("accept ")) {
- boolean ruleAccept = line.split(" ")[0].equals("accept");
- String ruleAddress = line.split(" ")[1].split(":")[0];
- if (!ruleAddress.equals("*")) {
- if (!ruleAddress.contains("/") &&
- !ruleAddress.equals(targetIP))
- continue; // IP address does not match
- String[] ruleIPParts = ruleAddress.split("/")[0].
- replace(".", " ").split(" ");
- int ruleNetwork = ruleAddress.contains("/") ?
- Integer.parseInt(ruleAddress.split("/")[1]) : 32;
- for (int i = 0; i < 4; i++) {
- if (ruleNetwork == 0) {
- break;
- } else if (ruleNetwork >= 8) {
- if (ruleIPParts[i].equals(targetIPParts[i]))
- ruleNetwork -= 8;
- else
- break;
- } else {
- int mask = 255 ^ 255 >>> ruleNetwork;
- if ((Integer.parseInt(ruleIPParts[i]) & mask) ==
- (Integer.parseInt(targetIPParts[i]) & mask))
- ruleNetwork = 0;
- break;
- }
- }
- if (ruleNetwork > 0)
- continue; // IP address does not match
- }
- String rulePort = line.split(" ")[1].split(":")[1];
- if (targetPort == null && !ruleAccept &&
- !rulePort.equals("*"))
- continue; // with no port given, we only consider
- // reject :* rules as matching
- if (targetPort != null) {
- if (!rulePort.equals("*") &&
- !targetPort.equals(rulePort))
- continue; // ports do not match
- }
- boolean relevantMatch = false;
- for (File f : relevantDescriptors.get(descriptor))
- if (relevantConsensuses.contains(f))
- relevantMatch = true;
- if (relevantMatch)
- System.out.println(" "
- + directoryOrFile.getAbsolutePath() + " "
- + (ruleAccept ? "permits" : "does not permit")
- + " exiting to " + target + " according to rule \""
- + line + "\"");
- if (ruleAccept)
- positiveConsensuses.addAll(
- relevantDescriptors.get(descriptor));
- break;
- }
- }
- br.close();
- }
- }
- }
- }
- }
-
- // print out result
- Set<File> matches = (target != null) ? positiveConsensuses
- : positiveConsensusesNoTarget;
- if (matches.contains(relevantConsensuses.last())) {
- System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE with "
- + "high certainty!\n\nWe found one or more relays on IP address "
- + relayIP
- + (target != null ? " permitting exit to " + target : "")
- + " in the most recent consensus preceding " + timestampStr
- + " that clients were likely to know.\n");
- return;
- }
- boolean resultIndecisive = target != null
- && !missingDescriptors.isEmpty();
- if (resultIndecisive) {
- System.out.println("\n" + DELIMITER + "\n\nResult is INDECISIVE!\n\n"
- + "At least one referenced descriptor could not be found. This "
- + "is a rare case, but one that (apparently) happens. We cannot "
- + "make any good statement about exit relays without these "
- + "descriptors. The following descriptors are missing:");
- for (String desc : missingDescriptors)
- System.out.println(" " + desc);
- }
- boolean inOtherRelevantConsensus = false, inTooOldConsensuses = false,
- inTooNewConsensuses = false;
- for (File f : matches)
- if (relevantConsensuses.contains(f))
- inOtherRelevantConsensus = true;
- else if (tooOldConsensuses.contains(f))
- inTooOldConsensuses = true;
- else if (tooNewConsensuses.contains(f))
- inTooNewConsensuses = true;
- if (inOtherRelevantConsensus) {
- if (!resultIndecisive)
- System.out.println("\n" + DELIMITER + "\n\nResult is POSITIVE "
- + "with moderate certainty!");
- System.out.println("\nWe found one or more relays on IP address "
- + relayIP
- + (target != null ? " permitting exit to " + target : "")
- + ", but not in the consensus immediately preceding "
- + timestampStr + ". A possible reason for the relay being "
- + "missing in the last consensus preceding the given time might "
- + "be that some of the directory authorities had difficulties "
- + "connecting to the relay. However, clients might still have "
- + "used the relay.");
- } else {
- if (!resultIndecisive)
- System.out.println("\n" + DELIMITER + "\n\nResult is NEGATIVE "
- + "with high certainty!");
- System.out.println("\nWe did not find any relay on IP address "
- + relayIP
- + (target != null ? " permitting exit to " + target : "")
- + " in the consensuses 3:00 hours preceding " + timestampStr
- + ".");
- if (inTooOldConsensuses || inTooNewConsensuses) {
- if (inTooOldConsensuses && !inTooNewConsensuses)
- System.out.println("\nNote that we found a matching relay in "
- + "consensuses that were published between 5:00 and 3:00 "
- + "hours before " + timestampStr + ".");
- else if (!inTooOldConsensuses && inTooNewConsensuses)
- System.out.println("\nNote that we found a matching relay in "
- + "consensuses that were published up to 2:00 hours after "
- + timestampStr + ".");
- else
- System.out.println("\nNote that we found a matching relay in "
- + "consensuses that were published between 5:00 and 3:00 "
- + "hours before and in consensuses that were published up "
- + "to 2:00 hours after " + timestampStr + ".");
- System.out.println("Make sure that the timestamp you provided is "
- + "in the correct timezone: UTC (or GMT).");
- }
- }
- if (target != null) {
- if (positiveConsensuses.isEmpty() &&
- !positiveConsensusesNoTarget.isEmpty())
- System.out.println("\nNote that although the found relay(s) did "
- + "not permit exiting to " + target + ", there have been one "
- + "or more relays running at the given time.");
- }
- System.out.println();
- }
-}
-
Deleted: projects/archives/trunk/exonerator/HOWTO
===================================================================
--- projects/archives/trunk/exonerator/HOWTO 2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/exonerator/HOWTO 2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,165 +0,0 @@
-ExoneraTor
- or: a script that tells you whether some IP address was a Tor relay
-
----------------------------------------------------------------------------
-
- THIS REPOSITORY HAS MOVED TO GIT!
-
- git clone git://git.torproject.org/metrics-utils/
-
----------------------------------------------------------------------------
-
-Introduction:
-
-Some people have expressed the desire to learn whether a given IP address
-has been a Tor relay at a certain time. In addition to that, these people
-might want to know whether the IP address permitted exit to a given address
-and port.
-
-Answering these questions can be important for Tor relay operators to show
-to the authorities that an anonymous user might have conducted bad things
-with their IP address. Likewise, police investigators might be interested
-in the answer to these questions, too, in order to decide whether to
-proceed with their investigations or not.
-
-We can answer the above questions from looking at the descriptor archives
-that are available since late 2007 (or even beyond, but this script only
-works with the data format that was produced starting in October 2007).
-This script parses the directory archives to print out the answer whether
-a certain IP address was a Tor relay at a given time. The script further
-prints out all intermediate steps in answering this, so that users can
-confirm the correctness of the result themselves.
-
-This script is available in two versions written in Python and in Java with
-equivalent functionality.
-
----------------------------------------------------------------------------
-
-Python Quick Start:
-
-In order to run the Python version of this script, you need to install and
-download the following software and data (please note that all instructions
-are written for Linux; commands for Windows or Mac OS X may vary):
-
-- Install Python 2.6.2 or higher. (Previous Python versions might work,
- too, but have not been tested.)
-
-- Install the Python module IPy 0.62 or higher either from
- http://pypi.python.org/pypi/IPy/ or using "apt-get install python-ipy" on
- Debian-based systems.
-
-- Download the v3 consensuses and server descriptors of the relevant time
- from http://metrics.torproject.org/data.html and extract them to a
- directory in your working directory, e.g. /home/you/exonerator/data/ .
- Don't rename the extracted directories or any of the contained files, or
- the script won't find the contained descriptors.
-
- Note that you only need the server descriptors if you want to learn
- whether a given IP address permits exiting to a given target. If you
- only want to learn whether that IP address was a Tor relay, you don't
- need them.
-
-- Run the script, providing it with the parameters it needs:
-
- python exonerator.py [--archive=<descriptor archive directory>]
- <IP address in question>
- <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss>
- [<target address>[:<target port>]]
-
- The --archive option defaults to data/ . In the following examples, it is
- assumed that this default applies.
-
- Make sure that the timestamp is provided in UTC, which is equivalent to
- GMT, and not in your local timezone! Otherwise, results will very likely
- be wrong.
-
- A sample invocation might be:
-
- $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00
- 209.85.129.104:80
-
----------------------------------------------------------------------------
-
-Java Quick Start:
-
-In order to run the Java version of this script, you need to install and
-download the following software and data (please note that all instructions
-are written for Linux; commands for Windows or Mac OS X may vary):
-
-- Install Java 6 or higher.
-
-- Download the BouncyCastle provider that includes Base 64 decoding from
- http://www.bouncycastle.org/download/bcprov-jdk16-143.jar and put it in
- your working directory, e.g. /home/you/exonerator/ .
-
-- Download the v3 consensuses and server descriptors of the relevant time
- from http://metrics.torproject.org/data.html and extract them to a
- directory in your working directory, e.g. /home/you/exonerator/data/ .
- Don't rename the extracted directories or any of the contained files, or
- the script won't find the contained descriptors.
-
- Note that you only need the server descriptors if you want to learn
- whether a given IP address permits exiting to a given target. If you
- only want to learn whether that IP address was a Tor relay, you don't
- need them.
-
-- Compile the (single) Java class using this command:
-
- $ javac -cp bcprov-jdk16-143.jar ExoneraTor.java
-
-- Run the script, providing it with the parameters it needs:
-
- java -cp .:bcprov-jdk16-143.jar ExoneraTor
- <descriptor archive directory>
- <IP address in question>
- <timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss>
- [<target address>[:<target port>]]
-
- Make sure that the timestamp is provided in UTC, which is equivalent to
- GMT, and not in your local timezone! Otherwise, results will very likely
- be wrong.
-
- A sample invocation might be:
-
- $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
- 2009-08-15 16:05:00 209.85.129.104:80
-
----------------------------------------------------------------------------
-
-Test cases:
-
-The following test cases work with the August 2009 archives and can be used
-to check whether this script works correctly:
-
-- Positive result of echelon1+2 being a relay:
-
- $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00
- $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
- 2009-08-15 16:05:00
-
-- Positive result of echelon1+2 exiting to google.com on any port
-
- $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 209.85.129.104
- $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
- 2009-08-15 16:05:00 209.85.129.104
-
-- Positive result of echelon1+2 exiting to google.com on port 80
-
- $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \
- 209.85.129.104:80
- $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
- 2009-08-15 16:05:00 209.85.129.104:80
-
-- Negative result of echelon1+2 exiting to google.com, but not on port 25
-
- $ python exonerator.py 209.17.171.104 2009-08-15 16:05:00 \
- 209.85.129.104:25
- $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.104 \
- 2009-08-15 16:05:00 209.85.129.104:25
-
-- Negative result with IP address of echelon1+2 changed in the last octet
-
- $ python exonerator.py 209.17.171.50 2009-08-15 16:05:00
- $ java -cp .:bcprov-jdk16-143.jar ExoneraTor data/ 209.17.171.50 \
- 2009-08-15 16:05:00
-
Deleted: projects/archives/trunk/exonerator/LICENSE
===================================================================
--- projects/archives/trunk/exonerator/LICENSE 2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/exonerator/LICENSE 2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,30 +0,0 @@
-Copyright 2009 The Tor Project
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- * Neither the names of the copyright owners nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
Deleted: projects/archives/trunk/exonerator/exonerator.py
===================================================================
--- projects/archives/trunk/exonerator/exonerator.py 2011-05-17 22:47:52 UTC (rev 24767)
+++ projects/archives/trunk/exonerator/exonerator.py 2011-05-18 20:15:43 UTC (rev 24768)
@@ -1,370 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2009 The Tor Project -- see LICENSE for licensing information
-
-import binascii
-import os
-import sys
-import time
-import calendar
-from optparse import OptionParser
-from IPy import IP
-
-USAGE = "usage: %prog [options] <IP address in question> " \
- "<timestamp, in UTC, formatted as YYYY-MM-DD hh:mm:ss> " \
- "[<target address>[:<target port>]]"
-DELIMITER = "-" * 75
-
-if __name__ == '__main__':
- # check parameters
- parser = OptionParser(usage=USAGE)
- parser.add_option("-a", "--archive", dest="archive", default="data/",
- help="descriptor archive directory")
- (options, args) = parser.parse_args()
- if len(args) not in (3, 4):
- parser.error("incorrect number of arguments")
- if not os.path.isdir(options.archive):
- parser.error("descriptor archive directory %s does not exist or " \
- "is not a directory." % \
- os.path.abspath(options.archive))
- archiveDirectory = os.path.dirname(options.archive)
- try:
- relayIP = IP(args[0])
- except ValueError:
- parser.error("invalid IP address in question: '%s'" % args[0])
- timestampStr = "%s %s" % (args[1], args[2])
- try:
- timestamp = time.strptime(timestampStr + " UTC", "%Y-%m-%d %H:%M:%S %Z")
- except ValueError:
- parser.error("incorrect time format: '%s'" % timestampStr)
- # if a target is given, parse address and possibly port part of it
- target = None
- targetIP = None
- targetPort = None
- if len(args) == 4:
- target = args[3]
- targetParts = target.split(":")
- try:
- targetIP = IP(targetParts[0])
- except ValueError:
- parser.error("invalid target IP address in: '%s'" % args[3])
- if len(targetParts) > 2:
- parser.error("invalid target format: '%s'" % args[3])
- if len(targetParts) > 1:
- try:
- targetPortTest = int(targetParts[1])
- except ValueError:
- parser.error("invalid target port number in: '%s'" % \
- args[3])
- if targetPortTest not in range(1, 65535):
- parser.error("invalid target port number in: '%s'" % \
- args[3])
- targetPort = targetParts[1]
-
- targetHelpStr = ""
- if target:
- targetHelpStr = " permitting exiting to %s" % target
- print "\nTrying to find out whether %s was running a Tor relay at " \
- "%s%s...\n\n%s\n" % (relayIP, timestampStr, targetHelpStr,
- DELIMITER)
-
- # check that we have the required archives
- timestampTooOld = time.gmtime(calendar.timegm(timestamp) - 300 * 60)
- timestampFrom = time.gmtime(calendar.timegm(timestamp) - 180 * 60)
- timestampTooNew = time.gmtime(calendar.timegm(timestamp) + 120 * 60)
- timestampTooOldStr = time.strftime("%Y-%m-%d %H:%M:%S",
- timestampTooOld)
- timestampFromStr = time.strftime("%Y-%m-%d %H:%M:%S", timestampFrom)
- timestampTooNewStr = time.strftime("%Y-%m-%d %H:%M:%S",
- timestampTooNew)
- print "\nChecking that relevant archives between %s and %s are " \
- "available..." % (timestampTooOldStr, timestampTooNewStr)
-
- requiredDirs = set()
- requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooOld))
- requiredDirs.add(time.strftime("consensuses-%Y-%m", timestampTooNew))
- if target:
- requiredDirs.add(time.strftime("server-descriptors-%Y-%m",
- timestampTooOld))
- requiredDirs.add(time.strftime("server-descriptors-%Y-%m",
- timestampTooNew))
-
- consensusDirs = list()
- descriptorsDirs = list()
- directoriesLeftToParse = list()
- directoriesLeftToParse.append(archiveDirectory)
-
- while directoriesLeftToParse:
- directoryOrFile = directoriesLeftToParse.pop()
- basename = os.path.basename(directoryOrFile)
- if basename.startswith("consensuses-"):
- if basename in requiredDirs:
- requiredDirs.remove(basename)
- consensusDirs.append(directoryOrFile)
- elif basename.startswith("server-descriptors-"):
- if basename in requiredDirs:
- requiredDirs.remove(basename)
- descriptorsDirs.append(directoryOrFile)
- else:
- for filename in os.listdir(directoryOrFile):
- entry = "%s/%s" % (directoryOrFile, filename)
- if os.path.isdir(entry):
- directoriesLeftToParse.append(entry)
-
- consensusDirs.sort()
- for consensusDir in consensusDirs:
- print " %s" % consensusDir
- descriptorsDirs.sort()
- for descriptorsDir in descriptorsDirs:
- print " %s" % descriptorsDir
-
- if requiredDirs:
- print "\nWe are missing consensuses and/or server descriptors. " \
- "Please download these archives and extract them to your " \
- "data directory. Be sure NOT to rename the extracted " \
- "directories or the contained files."
- for requiredDir in sorted(requiredDirs):
- print " %s.tar.bz2" % requiredDir
- sys.exit()
-
- # look for consensus files
- print "\nLooking for relevant consensuses between %s and %s..." % \
- (timestampFromStr, timestampStr)
- tooOldConsensuses = set()
- relevantConsensuses = set()
- tooNewConsensuses = set()
- directoriesLeftToParse = list(consensusDirs)
- while directoriesLeftToParse:
- directoryOrFile = directoriesLeftToParse.pop()
- if os.path.isdir(directoryOrFile):
- for filename in os.listdir(directoryOrFile):
- entry = "%s/%s" % (directoryOrFile, filename)
- directoriesLeftToParse.append(entry)
- else:
- basename = os.path.basename(directoryOrFile)
- if (basename.endswith("consensus")):
- consensusTime = time.strptime(basename[0:19],
- "%Y-%m-%d-%H-%M-%S")
- if consensusTime >= timestampTooOld and \
- consensusTime < timestampFrom:
- tooOldConsensuses.add(directoryOrFile)
- elif consensusTime >= timestampFrom and \
- consensusTime <= timestamp:
- relevantConsensuses.add(directoryOrFile)
- elif consensusTime > timestamp and \
- consensusTime <= timestampTooNew:
- tooNewConsensuses.add(directoryOrFile)
- allConsensuses = set()
- allConsensuses.update(tooOldConsensuses)
- allConsensuses.update(relevantConsensuses)
- allConsensuses.update(tooNewConsensuses)
- if not allConsensuses:
- print " None found!\n\n%s\n\nResult is INDECISIVE!\n\nWe " \
- "cannot make any statement about IP address %s being a " \
- "relay at %s or not! We did not find any relevant " \
- "consensuses preceding the given time. This either means " \
- "that you did not download and extract the consensus " \
- "archives preceding the hours before the given time, or " \
- "(in rare cases) that the directory archives are missing " \
- "the hours before the timestamp. Please check that your " \
- "directory archives contain consensus files of the " \
- "interval 5:00 hours before and 2:00 hours after the time " \
- "you are looking for.\n" % (DELIMITER, relayIP, timestampStr)
- sys.exit()
- for consensus in sorted(relevantConsensuses):
- print " %s" % consensus
-
- # parse consensuses to find descriptors belonging to the IP address
- print "\nLooking for descriptor identifiers referenced in \"r \" " \
- "lines in these consensuses containing IP address %s..." % \
- relayIP
- positiveConsensusesNoTarget = set()
- addressesInSameNetwork = set()
- relevantDescriptors = dict()
- for consensus in allConsensuses:
- if consensus in relevantConsensuses:
- print " %s" % consensus
- consensusFile = open(consensus, "r")
- line = consensusFile.readline()
- while line:
- if line.startswith("r "):
- address = IP(line.split(" ")[6])
- if address == relayIP:
- hexDesc = binascii.b2a_hex(binascii.a2b_base64(
- line.split(" ")[3] + "=="))
- if hexDesc not in relevantDescriptors.keys():
- relevantDescriptors[hexDesc] = set()
- relevantDescriptors[hexDesc].add(consensus)
- positiveConsensusesNoTarget.add(consensus)
- if consensus in relevantConsensuses:
- print " \"%s\" references descriptor %s" % \
- (line.rstrip(), hexDesc)
- elif relayIP.overlaps(IP("%s/24" % address,
- make_net=True)):
- addressesInSameNetwork.add(address)
- line = consensusFile.readline()
- consensusFile.close()
- if not relevantDescriptors:
- print " None found!\n\n%s\n\nResult is NEGATIVE with moderate " \
- "certainty!\n\nWe did not find IP address %s in any of " \
- "the consensuses that were published between %s and " \
- "%s.\n\nA possible reason for false negatives is that the " \
- "relay is using a different IP address when generating a " \
- "descriptor than for exiting to the Internet. We hope to " \
- "provide better checks for this case in the future." % \
- (DELIMITER, relayIP, timestampTooOldStr, timestampTooNewStr)
- if addressesInSameNetwork:
- print "\nThe following other IP addresses of Tor relays " \
- "were found in the mentioned consensus files that are " \
- "in the same /24 network and that could be related to " \
- "IP address %s:" % relayIP
- for addr in addressesInSameNetwork:
- print " %s" % addr
- print ""
- sys.exit()
-
- # parse router descriptors to check exit policies
- positiveConsensuses = set()
- missingDescriptors = set()
- if target:
- print "\nChecking if referenced descriptors permit exiting to " \
- "%s..." % target
- descriptors = relevantDescriptors.keys()
- for desc in descriptors:
- missingDescriptors.add(desc)
- directoriesLeftToParse = list(descriptorsDirs)
- while directoriesLeftToParse:
- directoryOrFile = directoriesLeftToParse.pop()
- if os.path.isdir(directoryOrFile):
- for filename in os.listdir(directoryOrFile):
- entry = "%s/%s" % (directoryOrFile, filename)
- directoriesLeftToParse.append(entry)
- else:
- basename = os.path.basename(directoryOrFile)
- for descriptor in descriptors:
- if basename == descriptor:
- missingDescriptors.remove(descriptor)
- descriptorFile = open(directoryOrFile, "r")
- line = descriptorFile.readline()
- while line:
- if line.startswith("reject ") or \
- line.startswith("accept "):
- ruleAccept = line.split()[0] == "accept"
- ruleAddress = line.split()[1].split(":")[0]
- if ruleAddress != "*" and not \
- IP(ruleAddress).overlaps(targetIP):
- # IP address does not match
- line = descriptorFile.readline()
- continue
- rulePort = line.split()[1].split(":")[1]
- if not targetPort and not ruleAccept and \
- rulePort != "*":
- # with no port given, we only consider
- # reject :* rules as matching
- line = descriptorFile.readline()
- continue
- if targetPort and rulePort != "*" and \
- targetPort != rulePort:
- # ports do not match
- line = descriptorFile.readline()
- continue
- relevantMatch = False
- for f in relevantDescriptors.get(
- descriptor):
- if f in relevantConsensuses:
- relevantMatch = True
- if relevantMatch:
- if ruleAccept:
- print " %s permits exiting to " \
- "%s according to rule " \
- "\"%s\"" % (directoryOrFile,
- target, line.rstrip())
- else:
- print " %s does not permit " \
- "exiting to %s according " \
- "to rule \"%s\"" % \
- (directoryOrFile,
- target, line.rstrip())
- if ruleAccept:
- for consensus in \
- relevantDescriptors.get(
- descriptor):
- positiveConsensuses.add(consensus)
- break
- line = descriptorFile.readline()
- descriptorFile.close()
-
- # print out result
- matches = None
- if target:
- matches = positiveConsensuses
- else:
- matches = positiveConsensusesNoTarget
- lastConsensus = sorted(relevantConsensuses)[len(relevantConsensuses)-1]
- if lastConsensus in matches:
- print "\n%s\n\nResult is POSITIVE with high certainty!\n\nWe " \
- "found one or more relays on IP address %s%s in the most " \
- "recent consensus preceding %s that clients were likely " \
- "to know.\n" % (DELIMITER, relayIP, targetHelpStr,
- timestampStr)
- sys.exit()
- resultIndecisive = target and len(missingDescriptors) > 0
- if resultIndecisive:
- print "\n%s\n\nResult is INDECISIVE!\n\nAt least one " \
- "referenced descriptor could not be found. This is a rare " \
- "case, but one that (apparently) happens. We cannot make " \
- "any good statement about exit relays without these " \
- "descriptors. The following descriptors are missing:" % \
- DELIMITER
- for desc in missingDescriptors:
- print " %s" % desc
- inOtherRelevantConsensus = False
- inTooOldConsensuses = False
- inTooNewConsensuses = False
- for f in matches:
- if f in relevantConsensuses:
- inOtherRelevantConsensus = True
- elif f in tooOldConsensuses:
- inTooOldConsensuses = True
- elif f in tooNewConsensuses:
- inTooNewConsensuses = True
- if inOtherRelevantConsensus:
- if not resultIndecisive:
- print "\n%s\n\nResult is POSITIVE with moderate certainty!" % \
- DELIMITER
- print "\nWe found one or more relays on IP address %s%s, but " \
- "not in the consensus immediately preceding %s. A " \
- "possible reason for the relay being missing in the last " \
- "consensus preceding the given time might be that some of " \
- "the directory authorities had difficulties connecting to " \
- "the relay. However, clients might still have used the " \
- "relay." % (relayIP, targetHelpStr, timestampStr)
- else:
- if not resultIndecisive:
- print "\n%s\n\nResult is NEGATIVE with high certainty!" % \
- DELIMITER
- print "\nWe did not find any relay on IP address %s%s in the " \
- "consensuses 3:00 hours preceding %s." % (relayIP,
- targetHelpStr, timestampStr)
- if inTooOldConsensuses or inTooNewConsensuses:
- if inTooOldConsensuses and not inTooNewConsensuses:
- print "\nNote that we found a matching relay in " \
- "consensuses that were published between 5:00 and " \
- "3:00 hours before %s." % timestampStr
- elif not inTooOldConsensuses and inTooNewConsensuses:
- print "\nNote that we found a matching relay in " \
- "consensuses that were published up to 2:00 hours " \
- "after %s." % timestampStr
- else:
- print "\nNote that we found a matching relay in " \
- "consensuses that were published between 5:00 and " \
- "3:00 hours before and in consensuses that were " \
- "published up to 2:00 hours after %s." % timestampStr
- print "Make sure that the timestamp you provided is in the " \
- "correct timezone: UTC (or GMT)."
- if target:
- if not positiveConsensuses and positiveConsensusesNoTarget:
- print "\nNote that although the found relay(s) did not " \
- "permit exiting to %s there have been one or more " \
- "relays running at the given time." % target
- print ""
-
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits