[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [collector/master] Separate parsing and sanitizing steps for bridge descriptors.
commit d5aba97f9b6c4ee74735b183552b8435e5e0661b
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Fri Oct 27 19:26:57 2017 +0200
Separate parsing and sanitizing steps for bridge descriptors.
First step towards implementing #20549.
---
.../SanitizedBridgeDescriptorBuilder.java | 54 +++++
.../bridgedescs/SanitizedBridgesWriter.java | 240 +++++++++------------
2 files changed, 161 insertions(+), 133 deletions(-)
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
new file mode 100644
index 0000000..174a5ae
--- /dev/null
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgeDescriptorBuilder.java
@@ -0,0 +1,54 @@
+package org.torproject.collector.bridgedescs;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Builder for sanitized bridge descriptors.
+ *
+ * <p>This builder class can be used while parsing and sanitizing an original
+ * bridge descriptor. It accepts already sanitized {@code String}s and
+ * {@code StringBuilder}s as placeholders for parts that can only be sanitized
+ * after finishing the parsing step.</p> */
+class SanitizedBridgeDescriptorBuilder {
+
+ private List<StringBuilder> descriptorParts;
+
+ private StringBuilder lastDescriptorPart;
+
+ SanitizedBridgeDescriptorBuilder() {
+ this.descriptorParts = new ArrayList<>();
+ this.lastDescriptorPart = new StringBuilder();
+ this.descriptorParts.add(this.lastDescriptorPart);
+ }
+
+ SanitizedBridgeDescriptorBuilder append(String sanitizedString) {
+ this.lastDescriptorPart.append(sanitizedString);
+ return this;
+ }
+
+ SanitizedBridgeDescriptorBuilder append(StringBuilder placeholder) {
+ this.descriptorParts.add(placeholder);
+ this.lastDescriptorPart = new StringBuilder();
+ this.descriptorParts.add(this.lastDescriptorPart);
+ return this;
+ }
+
+ SanitizedBridgeDescriptorBuilder space() {
+ this.lastDescriptorPart.append(' ');
+ return this;
+ }
+
+ SanitizedBridgeDescriptorBuilder newLine() {
+ this.lastDescriptorPart.append('\n');
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder fullDescriptor = new StringBuilder();
+ for (StringBuilder descriptorPart : this.descriptorParts) {
+ fullDescriptor.append(descriptorPart);
+ }
+ return fullDescriptor.toString();
+ }
+}
diff --git a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
index e257245..1ef1d60 100644
--- a/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
+++ b/src/main/java/org/torproject/collector/bridgedescs/SanitizedBridgesWriter.java
@@ -34,6 +34,7 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
@@ -671,23 +672,20 @@ public class SanitizedBridgesWriter extends CollecTorMain {
}
/* Parse descriptor to generate a sanitized version. */
- String scrubbedDesc = null;
+ String address = null;
String published = null;
+ byte[] fingerprintBytes = null;
+ StringBuilder scrubbedAddress = null;
+ Map<StringBuilder, String> scrubbedTcpPorts = new HashMap<>();
+ Map<StringBuilder, String> scrubbedIpAddressesAndTcpPorts = new HashMap<>();
String masterKeyEd25519FromIdentityEd25519 = null;
- try {
- BufferedReader br = new BufferedReader(new StringReader(
- new String(data, "US-ASCII")));
- StringBuilder scrubbed = new StringBuilder();
- String line = null;
- byte[] fingerprintBytes = null;
- String hashedBridgeIdentity = null;
- String address = null;
- String routerLine = null;
- String scrubbedRouterLine = null;
- String scrubbedAddress = null;
+ SanitizedBridgeDescriptorBuilder scrubbed =
+ new SanitizedBridgeDescriptorBuilder();
+ try (BufferedReader br = new BufferedReader(new StringReader(
+ new String(data, "US-ASCII")))) {
+ scrubbed.append(Annotation.BridgeServer.toString());
+ String line;
String masterKeyEd25519 = null;
- List<String> orAddresses = null;
- List<String> scrubbedOrAddresses = null;
boolean skipCrypto = false;
while ((line = br.readLine()) != null) {
@@ -706,15 +704,26 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
address = parts[2];
- routerLine = line;
-
- /* Store or-address parts in a list and sanitize them when we have
- * read the fingerprint. */
+ scrubbedAddress = new StringBuilder();
+ StringBuilder scrubbedOrPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedOrPort, parts[3]);
+ StringBuilder scrubbedDirPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedDirPort, parts[4]);
+ StringBuilder scrubbedSocksPort = new StringBuilder();
+ scrubbedTcpPorts.put(scrubbedSocksPort, parts[5]);
+ scrubbed.append("router ").append(parts[1]).space()
+ .append(scrubbedAddress).space()
+ .append(scrubbedOrPort).space()
+ .append(scrubbedDirPort).space()
+ .append(scrubbedSocksPort).newLine();
+
+ /* Store or-address and sanitize it when we have read the fingerprint
+ * and descriptor publication time. */
} else if (line.startsWith("or-address ")) {
- if (orAddresses == null) {
- orAddresses = new ArrayList<>();
- }
- orAddresses.add(line.substring("or-address ".length()));
+ String orAddress = line.substring("or-address ".length());
+ StringBuilder scrubbedOrAddress = new StringBuilder();
+ scrubbedIpAddressesAndTcpPorts.put(scrubbedOrAddress, orAddress);
+ scrubbed.append("or-address ").append(scrubbedOrAddress).newLine();
/* Parse the publication time to see if we're still inside the
* sanitizing interval. */
@@ -735,21 +744,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
this.haveWarnedAboutInterval = true;
}
}
- if (null != fingerprintBytes) {
- /* We have read both published and fingerprint lines that we need to
- * scrub the bridge's address which we might need to scrub reject
- * lines. */
- try {
- scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
- published);
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
- }
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
/* Parse the fingerprint to determine the hashed bridge
* identity. */
@@ -759,91 +754,23 @@ public class SanitizedBridgesWriter extends CollecTorMain {
? "opt fingerprint".length() : "fingerprint".length())
.replaceAll(" ", "").toLowerCase();
fingerprintBytes = Hex.decodeHex(fingerprint.toCharArray());
- hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes)
+ String hashedBridgeIdentity = DigestUtils.sha1Hex(fingerprintBytes)
.toLowerCase();
- if (null != published) {
- /* We have read both published and fingerprint lines that we need to
- * scrub the bridge's address which we might need to scrub reject
- * lines. */
- try {
- scrubbedAddress = scrubIpv4Address(address, fingerprintBytes,
- published);
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
- }
- scrubbed.append((line.startsWith("opt ") ? "opt " : "")
- + "fingerprint");
+ scrubbed.append(line.startsWith("opt ") ? "opt " : "")
+ .append("fingerprint");
for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++) {
- scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i,
+ scrubbed.space().append(hashedBridgeIdentity.substring(4 * i,
4 * (i + 1)).toUpperCase());
}
- scrubbed.append("\n");
+ scrubbed.newLine();
/* Replace the contact line (if present) with a generic one. */
} else if (line.startsWith("contact ")) {
- scrubbed.append("contact somebody\n");
+ scrubbed.append("contact somebody").newLine();
/* When we reach the signature, we're done. Write the sanitized
* descriptor to disk below. */
} else if (line.startsWith("router-signature")) {
-
- /* Write the scrubbed "router" line now based on the "router",
- * "fingerprint", and "published" lines that we read before. Also
- * scrub any "or-address" lines. */
- if (null == routerLine || null == fingerprintBytes
- || null == published) {
- logger.warn("Missing either of the following lines that are "
- + "required to sanitize this server bridge descriptor: "
- + "\"router\", \"fingerprint\", \"published\". Skipping "
- + "descriptor.");
- return;
- }
- try {
- if (orAddresses != null) {
- scrubbedOrAddresses = new ArrayList<>();
- for (String orAddress : orAddresses) {
- String scrubbedOrAddress = scrubOrAddress(orAddress,
- fingerprintBytes, published);
- if (scrubbedOrAddress != null) {
- scrubbedOrAddresses.add(scrubbedOrAddress);
- } else {
- logger.warn("Invalid address in line "
- + "'or-address " + orAddress + "' in bridge server "
- + "descriptor. Skipping line!");
- }
- }
- }
- String[] routerLineParts = routerLine.split(" ");
- String nickname = routerLineParts[1];
- String scrubbedOrPort = this.scrubTcpPort(routerLineParts[3],
- fingerprintBytes, published);
- String scrubbedDirPort = this.scrubTcpPort(routerLineParts[4],
- fingerprintBytes, published);
- String scrubbedSocksPort = this.scrubTcpPort(
- routerLineParts[5], fingerprintBytes, published);
- scrubbedRouterLine = String.format("router %s %s %s %s %s%n",
- nickname, scrubbedAddress, scrubbedOrPort,
- scrubbedDirPort, scrubbedSocksPort);
- } catch (IOException e) {
- /* There's a persistence problem, so we shouldn't scrub more
- * IP addresses in this execution. */
- this.persistenceProblemWithSecrets = true;
- return;
- }
-
- /* Put together the scrubbed descriptor from "router" to the newline
- * before the original "router-signature" line. */
- scrubbedDesc = scrubbedRouterLine;
- if (scrubbedOrAddresses != null) {
- for (String scrubbedOrAddress : scrubbedOrAddresses) {
- scrubbedDesc += "or-address " + scrubbedOrAddress + "\n";
- }
- }
- scrubbedDesc += scrubbed.toString();
break;
/* Replace extra-info digest with the hashed digest of the
@@ -860,7 +787,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
+ "expected: '" + line + "'. Skipping descriptor.");
return;
}
- scrubbed.append("extra-info-digest " + DigestUtils.sha1Hex(
+ scrubbed.append("extra-info-digest ").append(DigestUtils.sha1Hex(
Hex.decodeHex(parts[1].toCharArray())).toUpperCase());
if (parts.length > 2) {
if (!Base64.isBase64(parts[2])) {
@@ -868,21 +795,21 @@ public class SanitizedBridgesWriter extends CollecTorMain {
+ line + "'. Skipping descriptor.");
return;
}
- scrubbed.append(" " + Base64.encodeBase64String(
+ scrubbed.space().append(Base64.encodeBase64String(
DigestUtils.sha256(Base64.decodeBase64(parts[2])))
.replaceAll("=", ""));
}
- scrubbed.append("\n");
+ scrubbed.newLine();
/* Possibly sanitize reject lines if they contain the bridge's own
* IP address. */
} else if (line.startsWith("reject ")) {
if (address != null && line.startsWith("reject " + address)) {
- scrubbed.append("reject " + scrubbedAddress
- + line.substring("reject ".length() + address.length())
- + "\n");
+ scrubbed.append("reject ").append(scrubbedAddress)
+ .append(line.substring("reject ".length() + address.length()))
+ .newLine();
} else {
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
}
/* Extract master-key-ed25519 from identity-ed25519. */
@@ -907,8 +834,8 @@ public class SanitizedBridgesWriter extends CollecTorMain {
DigestUtils.sha256(Base64.decodeBase64(
masterKeyEd25519FromIdentityEd25519 + "=")))
.replaceAll("=", "");
- scrubbed.append("master-key-ed25519 " + sha256MasterKeyEd25519
- + "\n");
+ scrubbed.append("master-key-ed25519 ").append(sha256MasterKeyEd25519)
+ .newLine();
if (masterKeyEd25519 != null && !masterKeyEd25519.equals(
masterKeyEd25519FromIdentityEd25519)) {
logger.warn("Mismatch between identity-ed25519 and "
@@ -948,7 +875,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
|| line.startsWith("ipv6-policy ")
|| line.equals("tunnelled-dir-server")
|| line.startsWith("bridge-distribution-request ")) {
- scrubbed.append(line + "\n");
+ scrubbed.append(line).newLine();
/* Replace node fingerprints in the family line with their hashes
* and leave nicknames unchanged. */
@@ -956,13 +883,13 @@ public class SanitizedBridgesWriter extends CollecTorMain {
StringBuilder familyLine = new StringBuilder("family");
for (String s : line.substring(7).split(" ")) {
if (s.startsWith("$")) {
- familyLine.append(" $" + DigestUtils.sha1Hex(Hex.decodeHex(
+ familyLine.append(" $").append(DigestUtils.sha1Hex(Hex.decodeHex(
s.substring(1).toCharArray())).toUpperCase());
} else {
- familyLine.append(" " + s);
+ familyLine.append(" ").append(s);
}
}
- scrubbed.append(familyLine.toString() + "\n");
+ scrubbed.append(familyLine.toString()).newLine();
/* Skip the purpose line that the bridge authority adds to its
* cached-descriptors file. */
@@ -1003,7 +930,53 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
- /* Determine filename of sanitized server descriptor. */
+ /* Sanitize the parts that we couldn't sanitize earlier. */
+ if (null == address || null == fingerprintBytes
+ || null == published) {
+ logger.warn("Missing either of the following lines that are "
+ + "required to sanitize this server bridge descriptor: "
+ + "\"router\", \"fingerprint\", \"published\". Skipping "
+ + "descriptor.");
+ return;
+ }
+ try {
+ String scrubbedAddressString = scrubIpv4Address(address, fingerprintBytes,
+ published);
+ if (null == scrubbedAddressString) {
+ logger.warn("Invalid IP address in \"router\" line in bridge server "
+ + "descriptor. Skipping descriptor.");
+ return;
+ }
+ scrubbedAddress.append(scrubbedAddressString);
+ for (Map.Entry<StringBuilder, String> e
+ : scrubbedIpAddressesAndTcpPorts.entrySet()) {
+ String scrubbedOrAddress = scrubOrAddress(e.getValue(),
+ fingerprintBytes, published);
+ if (null == scrubbedOrAddress) {
+ logger.warn("Invalid IP address or TCP port in \"or-address\" line "
+ + "in bridge server descriptor. Skipping descriptor.");
+ return;
+ }
+ e.getKey().append(scrubbedOrAddress);
+ }
+ for (Map.Entry<StringBuilder, String> e : scrubbedTcpPorts.entrySet()) {
+ String scrubbedTcpPort = scrubTcpPort(e.getValue(), fingerprintBytes,
+ published);
+ if (null == scrubbedTcpPort) {
+ logger.warn("Invalid TCP port in \"router\" line in bridge server "
+ + "descriptor. Skipping descriptor.");
+ return;
+ }
+ e.getKey().append(scrubbedTcpPort);
+ }
+ } catch (IOException exception) {
+ /* There's a persistence problem, so we shouldn't scrub more IP addresses
+ * or TCP ports in this execution. */
+ this.persistenceProblemWithSecrets = true;
+ return;
+ }
+
+ /* Determine digest(s) of sanitized server descriptor. */
String descriptorDigest = null;
try {
String ascii = new String(data, "US-ASCII");
@@ -1048,6 +1021,14 @@ public class SanitizedBridgesWriter extends CollecTorMain {
return;
}
}
+ if (null != descriptorDigestSha256Base64) {
+ scrubbed.append("router-digest-sha256 ")
+ .append(descriptorDigestSha256Base64).newLine();
+ }
+ scrubbed.append("router-digest ").append(descriptorDigest.toUpperCase())
+ .newLine();
+
+ /* Determine filename of sanitized server descriptor. */
String dyear = published.substring(0, 4);
String dmonth = published.substring(5, 7);
File tarballFile = new File(
@@ -1073,14 +1054,7 @@ public class SanitizedBridgesWriter extends CollecTorMain {
outputFile.getParentFile().mkdirs();
BufferedWriter bw = new BufferedWriter(new FileWriter(
outputFile, appendToFile));
- bw.write(Annotation.BridgeServer.toString());
- bw.write(scrubbedDesc);
- if (descriptorDigestSha256Base64 != null) {
- bw.write("router-digest-sha256 " + descriptorDigestSha256Base64
- + "\n");
- }
- bw.write("router-digest " + descriptorDigest.toUpperCase()
- + "\n");
+ bw.write(scrubbed.toString());
bw.close();
}
} catch (ConfigurationException | IOException e) {
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits