[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r20938: {} Add script to remove all sensitive information from bridge d (in projects/archives/trunk: . bridge-desc-sanitizer)
Author: kloesing
Date: 2009-11-10 10:33:33 -0500 (Tue, 10 Nov 2009)
New Revision: 20938
Added:
projects/archives/trunk/bridge-desc-sanitizer/
projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java
projects/archives/trunk/bridge-desc-sanitizer/HOWTO
projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh
Log:
Add script to remove all sensitive information from bridge descriptors.
Added: projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java (rev 0)
+++ projects/archives/trunk/bridge-desc-sanitizer/ConvertBridgeDescs.java 2009-11-10 15:33:33 UTC (rev 20938)
@@ -0,0 +1,452 @@
+import java.io.*;
+import java.util.*;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.codec.binary.*;
+
+public class ConvertBridgeDescs {
+
+ public static void main(String[] args) throws Exception {
+
+ long started = System.currentTimeMillis();
+
+ if (args.length < 5) {
+ System.err.println("Usage: java "
+ + ConvertBridgeDescs.class.getSimpleName()
+ + " <input directory> <geoip.txt file> <YYYY> <MM> "
+ + "<output directory>");
+ System.exit(1);
+ }
+ File inDir = new File(args[0]);
+ File geoipFile = new File(args[1]);
+ String year = args[2];
+ String month = args[3];
+ int yearInt = Integer.parseInt(year);
+ int monthInt = Integer.parseInt(month);
+ File outDir = new File(args[4]);
+ if (!outDir.exists()) {
+ outDir.mkdir();
+ }
+
+ SortedSet<File> statuses = new TreeSet<File>();
+ Set<File> descriptors = new HashSet<File>();
+ Set<File> extrainfos = new HashSet<File>();
+
+ System.out.print("Parsing geoip.txt file... ");
+ BufferedReader r = new BufferedReader(new FileReader(geoipFile));
+ String line0 = null;
+ SortedMap<Long, String> geoipDatabase = new TreeMap<Long, String>();
+ while ((line0 = r.readLine()) != null) {
+ if (!line0.startsWith("#"))
+ geoipDatabase.put(Long.parseLong(line0.split(",")[0]),
+ line0.substring(line0.indexOf(',') + 1));
+ }
+ System.out.println("Found " + geoipDatabase.size()
+ + " entries (expected 100,000 +- 10,000).");
+
+ System.out.println("Checking files in " + inDir.getAbsolutePath()
+ + "...");
+ Stack<File> directoriesLeftToParse = new Stack<File>();
+ directoriesLeftToParse.push(inDir);
+ String currentYearAndMonth = "from-tonga-" + year + "-" + month;
+ String previousYearAndMonth = "from-tonga-" + (monthInt == 1 ?
+ "" + (yearInt - 1) + "-12" :
+ year + "-" + (monthInt < 11 ? "0" : "") + (monthInt - 1));
+ String nextYearAndMonth = "from-tonga-" + (monthInt == 12 ?
+ "" + (yearInt + 1) + "-01" :
+ year + "-" + (monthInt < 9 ? "0" : "") + (monthInt + 1));
+ while (!directoriesLeftToParse.isEmpty()) {
+ File directoryOrFile = directoriesLeftToParse.pop();
+ String filename = directoryOrFile.getName();
+ boolean addDirectory = false;
+ if (directoryOrFile.isDirectory()) {
+ if (/* base directory */
+ filename.equals("in") ||
+ /* current month */
+ filename.startsWith(currentYearAndMonth) ||
+ /* last days of previous month */
+ (filename.startsWith(previousYearAndMonth)
+ && Integer.parseInt(filename.substring(19, 21)) > 24) ||
+ /* first days of next month */
+ (filename.startsWith(nextYearAndMonth)
+ && Integer.parseInt(filename.substring(19, 21)) < 6)) {
+ for (File fileInDir: directoryOrFile.listFiles()) {
+ directoriesLeftToParse.push(fileInDir);
+ }
+ }
+ continue;
+ }
+ if (filename.startsWith("cached-extrainfo")) {
+ extrainfos.add(directoryOrFile);
+ } else if (filename.equals("bridge-descriptors")) {
+ descriptors.add(directoryOrFile);
+ } else if (filename.equals("networkstatus-bridges")) {
+ statuses.add(directoryOrFile);
+ }
+ }
+
+ int days = ((extrainfos.size() / 2 + descriptors.size()
+ + statuses.size()) + 3 * 24) / (3 * 48);
+ System.out.println("Found " + extrainfos.size()
+ + " cached-extrainfo[.new] files, " + descriptors.size()
+ + " bridge-descriptors files, and " + statuses.size()
+ + " networkstatus-bridges files, covering approximately " + days
+ + " days.");
+
+ System.out.print("Parsing extra-info descriptors");
+ String[] hex = new String[] { "0", "1", "2", "3", "4", "5", "6", "7",
+ "8", "9", "a", "b", "c", "d", "e", "f" };
+ for (String x : hex)
+ for (String y : hex)
+ new File(outDir + File.separator + "extra-infos" + File.separator
+ + x + File.separator + y).mkdirs();
+ Set<File> writtenExtrainfos = new HashSet<File>();
+ Map<String, String> extrainfoMapping = new HashMap<String, String>();
+ int parsed = 0;
+ for (File file : extrainfos) {
+ if (parsed++ > extrainfos.size() / days) {
+ System.out.print(".");
+ parsed = 0;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line = null;
+ StringBuilder original = null, scrubbed = null;
+ boolean skipSignature = false;
+ while ((line = br.readLine()) != null) {
+ if (skipSignature && !line.equals("-----END SIGNATURE-----")) {
+ continue;
+ } else if (line.startsWith("extra-info ")) {
+ original = new StringBuilder(line + "\n");
+ scrubbed = new StringBuilder("extra-info Unnamed "
+ + DigestUtils.shaHex(Hex.decodeHex(
+ line.split(" ")[2].toCharArray())).toUpperCase() + "\n");
+ } else if (line.startsWith("published ")
+ || line.startsWith("write-history ")
+ || line.startsWith("read-history ")
+ || line.startsWith("geoip-start-time ")
+ || line.startsWith("geoip-client-origins ")) {
+ original.append(line + "\n");
+ scrubbed.append(line + "\n");
+ } else if (line.startsWith("router-signature")) {
+ String originalDesc = original.toString() + line + "\n";
+ String originalHash = DigestUtils.shaHex(originalDesc);
+ String scrubbedDesc = scrubbed.toString();
+ String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
+ if (extrainfoMapping.containsKey(originalHash) &&
+ !extrainfoMapping.get(originalHash).equals(scrubbedHash)) {
+ System.out.println("We already have an extra-info mapping "
+ + "from " + originalHash + " to "
+ + extrainfoMapping.get(originalHash) + ", but we now want "
+ + "to add a mapping to " + scrubbedHash + ". Exiting");
+ System.exit(1);
+ }
+ extrainfoMapping.put(originalHash, scrubbedHash);
+ File out = new File(outDir + File.separator + "extra-infos"
+ + File.separator + scrubbedHash.charAt(0) + File.separator
+ + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
+ if (!out.exists()) {
+ BufferedWriter bw = new BufferedWriter(new FileWriter(out));
+ bw.write(scrubbedDesc);
+ bw.close();
+ writtenExtrainfos.add(out);
+ }
+ } else if (line.equals("-----BEGIN SIGNATURE-----")) {
+ skipSignature = true;
+ } else if (line.equals("-----END SIGNATURE-----")) {
+ skipSignature = false;
+ } else {
+ System.out.println("Unrecognized line '" + line + "'. Exiting");
+ System.exit(1);
+ }
+ }
+ br.close();
+ }
+ System.out.println("\nWrote " + writtenExtrainfos.size()
+ + " extra-info descriptors.");
+
+ System.out.print("Parsing server descriptors");
+ for (String x : hex)
+ for (String y : hex)
+ new File(outDir + File.separator + "descriptors" + File.separator
+ + x + File.separator + y).mkdirs();
+ Set<File> writtenDescriptors = new HashSet<File>();
+ Map<File, File> referencedExtraInfos = new HashMap<File, File>();
+ Map<String, String> descriptorMapping = new HashMap<String, String>();
+ int found = 0, notfound = 0;
+ parsed = 0;
+ String haveExtraInfo = null;
+ for (File file : descriptors) {
+ if (parsed++ > descriptors.size() / days) {
+ System.out.print(".");
+ parsed = 0;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line = null, country = null;
+ StringBuilder original = null, scrubbed = null;
+ boolean skipCrypto = false, contactWritten = false;
+ while ((line = br.readLine()) != null) {
+ if (skipCrypto && !line.startsWith("-----END ")) {
+ original.append(line + "\n");
+ continue;
+ } else if (line.startsWith("router ")) {
+ original = new StringBuilder(line + "\n");
+ country = "zz";
+ String[] ipParts = line.split(" ")[2].replace('.', ' ').split(" ");
+ long ipNum = Long.parseLong(ipParts[0]) * 256L * 256L * 256L
+ + Long.parseLong(ipParts[1]) * 256L * 256L
+ + Long.parseLong(ipParts[2]) * 256L
+ + Long.parseLong(ipParts[3]);
+ long intervalStart = -1;
+ if (ipNum >= geoipDatabase.firstKey()) {
+ intervalStart = geoipDatabase.subMap(0L, ipNum).lastKey();
+ String dbContent = geoipDatabase.get(intervalStart);
+ long intervalEnd = Long.parseLong(dbContent.split(",")[0]);
+ if (ipNum <= intervalEnd)
+ country = dbContent.split(",")[1].toLowerCase();
+ }
+ scrubbed = new StringBuilder("router Unnamed 127.0.0.1 "
+ + line.split(" ")[3] + " " + line.split(" ")[4] + " "
+ + line.split(" ")[5] + "\n");
+ contactWritten = false;
+ haveExtraInfo = null;
+ } else if (line.startsWith("opt fingerprint ")) {
+ original.append(line + "\n");
+ scrubbed.append("opt fingerprint");
+ String fingerprint = DigestUtils.shaHex(Hex.decodeHex(
+ line.substring(16).replaceAll(" ", "").toCharArray())).
+ toUpperCase();
+ for (int i = 0; i < fingerprint.length() / 4; i++)
+ scrubbed.append(" " + fingerprint.substring(4 * i, 4 * (i + 1)));
+ scrubbed.append("\n");
+ } else if (line.startsWith("contact ")) {
+ original.append(line + "\n");
+ scrubbed.append("contact somebody at example dot " + country
+ + "\n");
+ contactWritten = true;
+ } else if (line.startsWith("router-signature")) {
+ String originalDesc = original.toString() + line + "\n";
+ String originalHash = DigestUtils.shaHex(originalDesc);
+ String scrubbedDesc = scrubbed.toString();
+ String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
+ if (descriptorMapping.containsKey(originalHash) &&
+ !descriptorMapping.get(originalHash).equals(scrubbedHash)) {
+ System.out.println("We already have a descriptor mapping "
+ + "from " + originalHash + " to "
+ + descriptorMapping.get(originalHash) + ", but we now "
+ + "want to add a mapping to " + scrubbedHash
+ + ". Exiting");
+ System.exit(1);
+ }
+ descriptorMapping.put(originalHash, scrubbedHash);
+ if (haveExtraInfo != null) {
+ File out = new File(outDir + File.separator + "descriptors"
+ + File.separator + scrubbedHash.charAt(0) + File.separator
+ + scrubbedHash.charAt(1) + File.separator + scrubbedHash);
+ if (!out.exists()) {
+ BufferedWriter bw2 = new BufferedWriter(new FileWriter(out));
+ bw2.write(scrubbedDesc);
+ bw2.close();
+ writtenDescriptors.add(out);
+ String extraInfoHash = haveExtraInfo.toLowerCase();
+ File extrainfoFile = new File(outDir + File.separator
+ + "extra-infos" + File.separator
+ + extraInfoHash.charAt(0) + File.separator
+ + extraInfoHash.charAt(1) + File.separator
+ + extraInfoHash);
+ if (!extrainfoFile.exists()) {
+ System.out.println("Extra-info descriptor '"
+ + extrainfoFile + "' does not exist.");
+ System.exit(1);
+ }
+ referencedExtraInfos.put(out, extrainfoFile);
+ }
+ }
+ } else if (line.startsWith("opt extra-info-digest ")) {
+ String originalExtraInfo = line.split(" ")[2].toLowerCase();
+ if (!extrainfoMapping.containsKey(originalExtraInfo)) {
+ notfound++;
+ } else {
+ found++;
+ original.append(line + "\n");
+ haveExtraInfo = extrainfoMapping.get(originalExtraInfo).
+ toUpperCase();
+ scrubbed.append("opt extra-info-digest " + haveExtraInfo
+ + "\n");
+ }
+ } else if (line.startsWith("reject ")
+ || line.startsWith("accept ")) {
+ if (!contactWritten) {
+ scrubbed.append("contact nobody at example dot " + country
+ + "\n");
+ contactWritten = true;
+ }
+ original.append(line + "\n");
+ scrubbed.append(line + "\n");
+ } else if (line.startsWith("platform ")
+ || line.startsWith("opt protocols ")
+ || line.startsWith("published ")
+ || line.startsWith("uptime ")
+ || line.startsWith("bandwidth ")
+ || line.startsWith("uptime ")
+ || line.startsWith("opt hibernating ")
+ || line.equals("opt hidden-service-dir")
+ || line.equals("opt caches-extra-info")) {
+ original.append(line + "\n");
+ scrubbed.append(line + "\n");
+ } else if (line.startsWith("family ")) {
+ StringBuilder familyLine = new StringBuilder("family");
+ for (String s : line.substring(7).split(" ")) {
+ if (s.startsWith("$"))
+ familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(
+ s.substring(1).toCharArray())).toUpperCase());
+ else
+ familyLine.append(" " + s);
+ }
+ original.append(line + "\n");
+ scrubbed.append(familyLine.toString() + "\n");
+ } else if (line.startsWith("@purpose ")) {
+ continue;
+ } else if (line.startsWith("-----BEGIN ")
+ || line.equals("onion-key") || line.equals("signing-key")) {
+ skipCrypto = true;
+ original.append(line + "\n");
+ } else if (line.startsWith("-----END ")) {
+ skipCrypto = false;
+ original.append(line + "\n");
+ } else {
+ System.out.println("Unrecognized line '" + line + "'. Exiting");
+ System.exit(1);
+ }
+ }
+ br.close();
+ }
+ System.out.println("\nWrote " + writtenDescriptors.size()
+ + " bridge descriptors. While parsing, we found that we parsed "
+ + found + " extra-info identifiers before, but are missing "
+ + notfound + ". (The number of missing identifiers should be "
+ + "significantly smaller.)");
+
+ System.out.print("Parsing network statuses");
+ Set<File> referencedDescriptors = new HashSet<File>();
+ parsed = notfound = found = 0;
+ for (File file : statuses) {
+ if (parsed++ > statuses.size() / days) {
+ System.out.print(".");
+ parsed = 0;
+ }
+ if (!file.getParent().substring(file.getParent().
+ indexOf("from-tonga-")).startsWith(currentYearAndMonth)) {
+ continue;
+ }
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ String line = null;
+ StringBuilder scrubbed = new StringBuilder();
+ boolean addSLine = false;
+ while ((line = br.readLine()) != null) {
+ if (line.startsWith("r ")) {
+ String[] parts = line.split(" ");
+ String bridgeIdentity = parts[2] + "==";
+ String hexBridgeIdentity = Hex.encodeHexString(
+ Base64.decodeBase64(bridgeIdentity));
+ String hashedBridgeIdentity2 = Base64.encodeBase64String(
+ DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))).
+ replace("=", "");
+ String hashedBridgeIdentity = Base64.encodeBase64String(
+ DigestUtils.sha(Base64.decodeBase64(bridgeIdentity))).
+ substring(0, 27);
+ String descIdentifier = parts[3] + "==";
+ String hexDescIdentifier = Hex.encodeHexString(
+ Base64.decodeBase64(descIdentifier));
+ if (!descriptorMapping.containsKey(hexDescIdentifier)) {
+ notfound++;
+ addSLine = false;
+ } else {
+ found++;
+ String refDesc = descriptorMapping.get(hexDescIdentifier).
+ toLowerCase();
+ File descriptorFile = new File(outDir + File.separator
+ + "descriptors" + File.separator + refDesc.charAt(0)
+ + File.separator + refDesc.charAt(1) + File.separator
+ + refDesc);
+ if (!descriptorFile.exists()) {
+ System.out.println("Descriptor file '"
+ + descriptorFile.getAbsolutePath() + "' does not exist.");
+ }
+ String replacementDescIdentifier = Base64.encodeBase64String(
+ Hex.decodeHex(descriptorMapping.get(hexDescIdentifier).
+ toCharArray())).substring(0, 27);
+ scrubbed.append("r Unnamed " + hashedBridgeIdentity
+ + " " + replacementDescIdentifier + " " + parts[4] + " "
+ + parts[5] + " 127.0.0.1 " + parts[7] + " " + parts[8]
+ + "\n");
+ addSLine = true;
+ referencedDescriptors.add(descriptorFile);
+ }
+ } else if (line.startsWith("s ")) {
+ if (addSLine) {
+ scrubbed.append(line + "\n");
+ }
+ } else {
+ System.out.println("Unknown line: " + line);
+ System.exit(1);
+ }
+ }
+ String timeString = file.getParent().substring(file.getParent().
+ indexOf("from-tonga-") + 11);
+ String[] date = timeString.substring(0, 10).split("-");
+ String time = timeString.substring(11, 17);
+ File dir = new File(outDir + File.separator + "statuses"
+ + File.separator + date[0] + File.separator + date[1]
+ + File.separator + date[2] + File.separator);
+ dir.mkdirs();
+ File out = new File(dir.getAbsolutePath() + File.separator + date[0]
+ + date[1] + date[2] + "-" + time + "-"
+ + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");
+ if (!out.exists()) {
+ BufferedWriter bw3 = new BufferedWriter(new FileWriter(out));
+ bw3.write(scrubbed.toString());
+ bw3.close();
+ }
+ }
+ System.out.println("\nWhile parsing, we found that we parsed "
+ + found + " bridge descriptors before, but are missing "
+ + notfound + ". (The number of missing identifiers should be "
+ + "significantly smaller.)");
+
+ Set<File> deleteFromReferencedExtraInfos = new HashSet<File>();
+ for (File e : referencedExtraInfos.keySet()) {
+ if (!referencedDescriptors.contains(e)) {
+ deleteFromReferencedExtraInfos.add(e);
+ }
+ }
+ for (File e : deleteFromReferencedExtraInfos) {
+ referencedExtraInfos.remove(e);
+ }
+ SortedSet<File> deleteDescriptors = new TreeSet<File>();
+ for (File e : writtenDescriptors) {
+ if (!referencedDescriptors.contains(e)) {
+ deleteDescriptors.add(e);
+ }
+ }
+ SortedSet<File> deleteExtraInfos = new TreeSet<File>();
+ for (File e : writtenExtrainfos) {
+ if (!referencedExtraInfos.values().contains(e)) {
+ deleteExtraInfos.add(e);
+ }
+ }
+ System.out.println("Deleting " + deleteDescriptors.size()
+ + " unreferenced bridge descriptors and "
+ + deleteExtraInfos.size() + " extra-info descriptors (keeping "
+ + (writtenDescriptors.size() - deleteDescriptors.size())
+ + " bridge descriptors and " + (writtenExtrainfos.size()
+ - deleteExtraInfos.size()) + " extra-info descriptors).");
+ for (File e : deleteDescriptors)
+ e.delete();
+ for (File e : deleteExtraInfos)
+ e.delete();
+
+ long finished = System.currentTimeMillis();
+ System.out.println("Processing took " + ((finished - started) / 1000)
+ + " seconds.");
+ }
+}
+
Added: projects/archives/trunk/bridge-desc-sanitizer/HOWTO
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/HOWTO (rev 0)
+++ projects/archives/trunk/bridge-desc-sanitizer/HOWTO 2009-11-10 15:33:33 UTC (rev 20938)
@@ -0,0 +1,113 @@
+Bridge descriptor sanitizer
+
+---------------------------------------------------------------------------
+
+Introduction:
+
+The bridge authority Tonga keeps a list of bridges in order to serve bridge
+addresses and descriptors to its clients. Every half hour, Tonga copies a
+snapshot of the known bridge descriptors to moria where these descriptors
+are archived for later statistical analysis. As a guiding principle, the
+Tor project makes all data that it uses for statistical analysis available
+to the interested public, in order to maximize transparency towards the
+community. However, the bridge descriptors contain the IP addresses and
+other contact information of bridges that must not be made public, or the
+purpose of bridges as non-public entry points into the Tor network would be
+obsolete. This script takes the half-hourly snapshots as input, removes all
+possibly sensitive information from the descriptors, and puts out the
+sanitized bridge descriptors that are safe to be published.
+
+---------------------------------------------------------------------------
+
+Processing steps:
+
+The following steps are taken to remove all potentially sensitive
+information from the bridge descriptors while keeping them useful for
+statistical analysis.
+
+1. Replace the bridge identity with its SHA1 value
+
+ Clients can request a bridge's current descriptor by sending its
+ identity string to the bridge authority. This is a feature to make
+ bridges on dynamic IP addresses useful. Therefore, the original
+ identities (and anything that could be used to derive them) need to be
+ removed from the descriptors. The bridge identity is replaced with its
+ SHA1 hash value. The idea is to have a consistent replacement that
+ remains stable over months or even years (without keeping a secret for a
+ keyed hash function).
+
+2. Remove all cryptographic keys and signatures
+
+ It would be straightforward to learn about the bridge identity from the
+ bridge's public key. Replacing keys by newly generated ones seemed to be
+ unnecessary (and would involve keeping a state over months/years), so
+ that all cryptographic objects have simply been removed.
+
+3. Replace IP address with 127.0.0.1
+
+ Of course, the IP address needs to be removed, too. However, the IP
+ address is resolved to a country code first and the result written to
+ the contact line as "somebody at example dot de" for Germany, etc. The
+ ports are kept unchanged though.
+
+4. Replace contact information
+
+ If there is contact information in a descriptor, the contact line is
+ changed to "somebody at ...". If there is none, a contact line is added
+ saying "nobody at ..." in order to put in the country code.
+
+5. Replace nickname with Unnamed
+
+ The bridge nicknames might give hints on the location of the bridge if
+ chosen without care; e.g. a bridge nickname might be very similar to the
+ operators' relay nicknames which might be located on adjacent IP
+ addresses. All bridge nicknames are therefore replaced with the string
+ Unnamed.
+
+Note that these processing steps only prevent people from learning about
+new bridge locations. People who already know a bridge identity or location
+can easily learn more about this bridge from the sanitized descriptors.
+This is useful for statistical analysis, e.g. to filter out bridges that
+have been running as relays before.
+
+---------------------------------------------------------------------------
+
+Quick Start:
+
+The following steps are necessary to process the half-hourly snapshots as
+collected by moria:
+
+- Install Java 5 or higher.
+
+- Download Apache Commons Codec 1.4 or higher for Base 64 and hex encoding
+ from http://commons.apache.org/codec/ and place the .jar (in the
+ following assumed to be commons-codec-1.4.jar) in the same directory as
+ this HOWTO file.
+
+- Copy the half-hourly snapshots named from-tonga-YYYY-MM-DDThhmmssZ.tar.gz
+ in a directory called data/ in the same directory as this HOWTO file.
+
+- Run ./extract-bridges.sh to extract the half-hourly snapshots in data/
+ to separate directories in the newly created subdirectory in/ .
+
+- Copy the geoip.txt from the Tor sources (from /src/config/) to the same
+ directory as this HOWTO file.
+
+- Compile the Java class using
+
+ $ javac -cp commons-codec-1.4.jar ConvertBridgeDescs.java
+
+- Run the script, providing it with the parameters it needs:
+
+ java -cp .:commons-codec-1.4.jar ConvertBridgeDescs
+ <input directory> <geoip.txt file>
+ <YYYY> <MM> <output directory>
+
+ Note that YYYY and MM specify the month that shall be processed. The other
+ descriptors in the input directory are ignored.
+
+ A sample invocation might be:
+
+ $ java -cp .:commons-codec-1.4.jar ConvertBridgeDescs in/ geoip.txt
+ 2008 10 out/
+
Added: projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh
===================================================================
--- projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh (rev 0)
+++ projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh 2009-11-10 15:33:33 UTC (rev 20938)
@@ -0,0 +1,8 @@
+#!/bin/bash
+mkdir "in/"
+for i in `ls data/ | cut -c 1-29`
+do
+mkdir "in/"$i
+tar -C "in/"$i -xf "data/"$i".tar.gz"
+done
+
Property changes on: projects/archives/trunk/bridge-desc-sanitizer/extract-bridges.sh
___________________________________________________________________
Added: svn:executable
+ *