[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[or-cvs] r12771: Initial svn import of bridge disbursal code (/ bridgedb bridgedb/trunk bridgedb/trunk/lib bridgedb/trunk/lib/bridgedb)
Author: nickm
Date: 2007-12-11 18:21:20 -0500 (Tue, 11 Dec 2007)
New Revision: 12771
Added:
bridgedb/
bridgedb/branches/
bridgedb/tags/
bridgedb/trunk/
bridgedb/trunk/TODO
bridgedb/trunk/lib/
bridgedb/trunk/lib/bridgedb/
bridgedb/trunk/lib/bridgedb/Bridges.py
bridgedb/trunk/lib/bridgedb/Dist.py
bridgedb/trunk/lib/bridgedb/Main.py
Log:
Initial svn import of bridge disbursal code
Added: bridgedb/trunk/TODO
===================================================================
--- bridgedb/trunk/TODO (rev 0)
+++ bridgedb/trunk/TODO 2007-12-11 23:21:20 UTC (rev 12771)
@@ -0,0 +1,16 @@
+
+For dec:
+- better name
+- https frontend
+- email frontend
+- normalize email correctly
+- figure out which python version I need; document that.
+- break time into periods
+
+Later:
+- better area division logic
+- make all proxies get stuck in their own area.
+- implement hop
+- implement slightly nicer logging
+
+
Added: bridgedb/trunk/lib/bridgedb/Bridges.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Bridges.py (rev 0)
+++ bridgedb/trunk/lib/bridgedb/Bridges.py 2007-12-11 23:21:20 UTC (rev 12771)
@@ -0,0 +1,304 @@
+#!/usr/bin/python
+
+import binascii
+import bisect
+import hashlib
+import hmac
+import socket
+import struct
+import time
+
+HEX_FP_LEN = 40
+ID_LEN = 20
+
+HEX_DIGEST_LEN = 64
+DIGEST_LEN = 32
+
+def is_valid_ip(ip):
+ try:
+ socket.inet_aton(ip)
+ except socekt.error:
+ return False
+ else:
+ return True
+
+def is_valid_fingerprint(fp):
+ if len(fp) != HEX_FP_LEN:
+ return False
+ try:
+ toHex(fp)
+ except TypeError:
+ return False
+ else:
+ return True
+
+toHex = binascii.b2a_hex
+fromHex = binascii.a2b_hex
+
+def get_hmac(k,v):
+ h = hmac.new(k, v, hashlib.sha256)
+ return h.digest()
+
+def get_hmac_fn(k, hex=True):
+ h = hmac.new(k, digestmod=hashlib.sha256)
+ def hmac_fn(v):
+ h_tmp = h.copy()
+ h_tmp.update(v)
+ if hex:
+ return h_tmp.hexdigest()
+ else:
+ return h_tmp.digest()
+ return hmac_fn
+
+def chopString(s, size):
+ for pos in xrange(0, len(s), size):
+ yield s[pos:pos+size]
+
+class Bridge:
+ def __init__(self, nickname, ip, orport, fingerprint=None, id_digest=None):
+ self.nickname = nickname
+ self.ip = ip
+ self.orport = orport
+ if id_digest is not None:
+ assert fingerprint is None
+ if len(id_digest) != DIGEST_LEN:
+ raise TypeError("Bridge with invalid ID")
+ self.fingerprint = toHex(id_digest)
+ elif fingerprint is not None:
+ if not is_valid_fingerprint(fromHex(fingerprint)):
+ raise TypeError("Bridge with invalid fingerprint")
+ self.fingerprint = fingerprint.lower()
+ else:
+ raise TypeError("Bridge with no ID")
+
+ def getID(self):
+ return fromHex(self.fingerprint)
+
+ def __repr__(self):
+ return "Bridge(%r,%r,%d,%r)"%(
+ self.nickname, self.ip, self.orport, self.fingerprint)
+
+ def getConfigLine(self):
+ return "bridge %s:%d %s" % (self.ip, self.orport, self.fingerprint)
+
+ def assertOK(self):
+ assert is_valid_ip(self.ip)
+ assert is_valid_fingerprint(self.fingerprint)
+ assert 1 <= self.orport <= 65535
+
+def parseDescFile(f, bridge_purpose='bridge'):
+ nickname = ip = orport = fingerprint = purpose = None
+
+ for line in f:
+ line = line.strip()
+ if line.startswith("opt "):
+ line = line[4:]
+
+ if line.startswith("@purpose "):
+ items = line.split()
+ purpose = items[1]
+ elif line.startswith("router "):
+ items = line.split()
+ if len(items) >= 4:
+ nickname = items[1]
+ ip = items[2]
+ orport = int(items[3])
+ elif line.startswith("fingerprint "):
+ fingerprint = line[12:].replace(" ", "")
+ elif line.startswith("router-signature"):
+ purposeMatches = (purpose == bridge_purpose or
+ bridge_purpose is None)
+ if purposeMatches and nickname and ip and orport and fingerprint:
+ b = Bridge(nickname, ip, orport, fingerprint)
+ b.assertOK()
+ yield b
+ nickname = ip = orport = fingerprint = purpose = None
+
+class BridgeHolder:
+ def insert(self, bridge):
+ raise NotImplemented
+
+ def assignmentsArePersistent(self):
+ return True
+
+class BridgeRing(BridgeHolder):
+ def __init__(self, key):
+ self.bridges = {}
+ self.bridgesByID = {}
+ self.hmac = get_hmac_fn(key, hex=False)
+ self.isSorted = False
+ self.sortedKeys = []
+
+ def insert(self, bridge):
+ id = bridge.getID()
+ pos = self.hmac(id)
+ if not self.bridges.has_key(pos):
+ self.sortedKeys.append(pos)
+ self.isSorted = False
+ self.bridges[pos] = bridge
+ self.bridgesByID[id] = bridge
+
+ def sort(self):
+ if not self.isSorted:
+ self.sortedKeys.sort()
+ self.isSorted = True
+
+ def _getBridgeKeysAt(self, pos, N=1):
+ assert len(pos) == DIGEST_LEN
+ if N >= len(self.sortedKeys):
+ return self.sortedKeys
+ if not self.isSorted:
+ self.sort()
+ idx = bisect.bisect_left(self.sortedKeys, pos)
+ r = self.sortedKeys[idx:idx+N]
+ if len(r) < N:
+ # wrap around as needed.
+ r.extend(self.sortedKeys[:N - len(r)])
+ assert len(r) == N
+ return r
+
+ def getBridges(self, pos, N=1):
+ keys = self._getBridgeKeysAt(pos, N)
+ keys.sort()
+ return [ self.bridges[k] for k in keys ]
+
+ def getBridgeByID(self, fp):
+ return self.bridgesByID.get(fp)
+
+ def __len__(self):
+ return len(self.bridges)
+
+
+class LogDB:
+ def __init__(self, kwd, db, logfile):
+ self._kwd = kwd
+ self._db = db
+ self._logfile = logfile
+ def __delitem__(self, k):
+ self._logfile.write("%s: del[%r]\n"%(self._kwd, k))
+ del self._db[k]
+ def __setitem__(self, k, v):
+ self._logfile.write("%s: [%r] = [%r]\n"%(self._kwd, k, v))
+ self._db[k] = v
+ def setdefault(self, k, v):
+ try:
+ return self._db[k]
+ except KeyError:
+ self._logfile.write("%s: [%r] = [%r]\n"%(self._kwd, k, v))
+ self._db[k] = v
+ return v
+ def __len__(self):
+ return len(self._db)
+ def __getitem__(self, k):
+ return self._db[k]
+ def has_key(self, k):
+ return self._db.has_key(k)
+ def get(self, k, v=None):
+ return self._db.get(k, v)
+ def keys(self):
+ return self._db.keys()
+ def rotate
+
+class PrefixStore:
+ def __init__(self, store, prefix):
+ self._d = store
+ self._p = prefix
+ def __setitem__(self, k, v):
+ self._d[self._p+k] = v
+ def __delitem__(self, k):
+ del self._d[self._p+k]
+ def __getitem__(self, k):
+ return self._d[self._p+k]
+ def has_key(self, k):
+ return self._d.has_key(self._p+k)
+ def get(self, k, v=None):
+ return self._d.get(self._p+k, v)
+ def setdefault(self, k, v):
+ return self._d.setdefault(self._p+k, v)
+ def keys(self):
+ n = len(self._p)
+ return [ k[n:] for k in self._d.keys() if k.startswith(self._p) ]
+
+def FixedBridgeSplitter(BridgeHolder):
+ def __init__(self, key, rings):
+ self.hmac = get_mac_fn(key, hex=True)
+ self.rings = rings[:]
+ for r in self.rings:
+ assert(isinstance(r, BridgeHolder))
+
+ def insert(self, bridge):
+ # Grab the first 4 bytes
+ digest = self.hmac(bridge.getID())
+ pos = long( digest[:8], 16 )
+ which = pos % len(self.rings)
+ self.ring[which].insert(bridge)
+
+class UnallocatedHolder(BridgeHolder):
+ def insert(self, bridge):
+ pass
+
+ def assignmentsArePersistent(self):
+ return False
+
+class BridgeTracker:
+ def __init__(self, firstSeenStore, lastSeenStore):
+ self.firstSeenStore = firstSeenStore
+ self.lastSeenStore = lastSeenStore
+
+ def insert(self, bridge):
+ #XXXX is this really sane? Should we track minutes? hours?
+ now = time.strftime("%Y-%m-%d %H:%M", time.gmtime())
+ bridgeID = bridge.getID()
+ # The last-seen time always gets updated
+ self.lastSeenStore[bridgeID] = now
+ # The first-seen time only gets updated if it wasn't already set.
+ self.firstSeenStore.setdefault(bridgeID, now)
+
+def BridgeSplitter(BridgeHolder):
+ def __init__(self, key, store):
+ self.hmac = hmac.new(key, digestmod=hashlib.sha256)
+ self.store = store
+ self.ringsByName = {}
+ self.totalP = 0
+ self.pValues = []
+ self.rings = []
+ self.statsHolders = []
+
+ def addRing(self, ring, ringname, p=1):
+ assert isinstance(ring, BridgeHolder)
+ self.ringsByName[ringname] = ring
+ self.pValues.append(self.totalP)
+ self.rings.append(ringname)
+ self.totalP += p
+
+ def addTracker(self, t):
+ self.statsHolders.append(t)
+
+ def insert(self, bridge):
+ assert self.rings
+ for s in self.statsHolders:
+ s.insert(bridge)
+ bridgeID = bridge.getID()
+ ringname = self.store.get(bridgeID, "")
+ ring = self.ringsByName.get(ringname)
+ if ring is not None:
+ ring.insert(bridge)
+ else:
+ pos = self.hmac(bridgeID)
+ n = int(pos[:8], 16) % self.totalP
+ pos = bisect.bisect_right(self.pValues, p) - 1
+ assert 0 <= pos < len(self.rings)
+ ringname = self.rings[pos]
+ ring = self.ringsByName.get(ringname)
+ if ring.assignmentsArePersistent():
+ self.store[bridgeID] = ringname
+ ring.insert(bridge)
+
+if __name__ == '__main__':
+ import sys
+ br = BridgeRing("hello")
+ for fname in sys.argv[1:]:
+ f = open(fname)
+ for bridge in parseDescFile(f):
+ br.insert(bridge)
+
Added: bridgedb/trunk/lib/bridgedb/Dist.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Dist.py (rev 0)
+++ bridgedb/trunk/lib/bridgedb/Dist.py 2007-12-11 23:21:20 UTC (rev 12771)
@@ -0,0 +1,80 @@
+
+import Bridges
+import socket
+
+def uniformMap(ip):
+ "Map an IP to an arbitrary 'area' string"
+ # convert the IP for 4 bytes.
+ s = socket.inet_aton(ip)
+ # return the first 3.
+ return s[:3]
+
+
+class IPBasedDistributor(Bridges.BridgeHolder):
+ def __init__(self, areaMapper, nClusters, key):
+ self.areaMapper = areaMapper
+
+ self.rings = []
+ for n in xrange(nClusters):
+ key1 = Bridges.get_hmac(key, "Order-Bridges-In-Ring-%d"%n)
+ self.rings.append( Bridges.BridgeRing(key1) )
+
+ key2 = Bridges.get_hmac(key, "Assign-Bridges-To-Rings")
+ self.splitter = Bridges.FixedBridgeSplitter(key2, self.rings)
+
+ key3 = Bridges.get_hmac(key, "Order-Areas-In-Rings")
+ self.areaOrderHmac = Bridges.get_hmac_fn(key3, hex=True)
+
+ key4 = Bridges.get_hmac(key, "Assign-Areas-To-Rings")
+ self.areaClusterHmac = Bridges.get_hmac_fun(key4, hex=True)
+
+ def insert(self, bridge):
+ self.splitter.insert(bridge)
+
+ def getBridgesForIP(self, ip, epoch, N=1):
+ area = self.areaMapper(ip)
+
+ # Which bridge cluster should we look at?
+ h = int( self.areaClusterHmac(area)[:8], 16 )
+ clusterNum = h % len(self.rings)
+ ring = self.rings[clusterNum]
+
+ # Now get the bridge.
+ pos = self.areaOrderHmac("<%s>%s" % (epoch, area))
+ return ring.getBridges(pos, N)
+
+
+def normalizeEmail(addr):
+ #XXXX make this better.
+ return addr.strip().lower()
+
+class EmailBasedDistributor(Bridges.BridgeHolder):
+ def __init__(self, key, store):
+
+ key1 = Bridges.get_hmac(key, "Map-Addresses-To-Ring")
+ self.emailHmac = Bridges.get_hmac_fn(key1, hex=1)
+
+ key2 = Bridges.get_hmac(key, "Order-Bridges-In-Ring")
+ self.ring = Bridges.BrigeRing(key2)
+ self.store = store
+
+ def insert(self, bridge):
+ self.ring.insert(bridge):
+
+ def getBridgesForEmail(self, emailaddress, epoch, N=1):
+ emailaddress = normalizeEmail(emailaddress)
+ if store.has_key(emailaddress):
+ result = []
+ ids = store[emailaddress])
+ for id in Bridges.chopString(ids, Bridges.ID_LEN)
+ b = self.ring.getBridgeByID(id)
+ if b != None:
+ result.append(b)
+ return result
+
+ pos = self.emailHmac("<%s>%s" % (epoch, emailaddress))
+ result = ring.getBridges(pos, N)
+ memo = "".join(b.getID() for b in result)
+ self.store[emailaddress] = memo
+ return result
+
Added: bridgedb/trunk/lib/bridgedb/Main.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Main.py (rev 0)
+++ bridgedb/trunk/lib/bridgedb/Main.py 2007-12-11 23:21:20 UTC (rev 12771)
@@ -0,0 +1,80 @@
+
+import anydbm
+
+import Bridges
+import Dist
+import os
+import sys
+
+CONFIG = dict(
+ BRIDGE_FILES = [ "./cached-descriptors", "./cached-descriptors.new" ],
+ BRIDGE_PURPOSE = "bridge",
+ DB_FILE = [ "./bridgedist" ],
+ DB_LOG_FILE = [ "./bridgedist.log" ],
+ HTTPS_DIST = True,
+ EMAIL_DIST = True,
+ N_IP_CLUSTERS = 8,
+ MASTER_KEY_FILE = [ "./secret_key" ]
+ HTTPS_SHARE=10,
+ EMAIL_SHARE=10,
+ RESERVED_SHARE=2
+ )
+
+def getKey(fname):
+ try:
+ f = open(fname, 'r')
+ except IOError:
+ k = os.urandom(32)
+ flags = os.O_WRONLY|os.O_TRUNC|getattr(os, "O_BIN", 0)
+ fd = os.open(fname, flags, 0400)
+ os.write(fd, k)
+ os.close(fd)
+ else:
+ k = f.read()
+ f.close()
+
+ return k
+
+def load(cfg, splitter):
+ for fname in cfg.BRIDGE_FILES:
+ f = open(fname, 'r')
+ for bridge in Bridges.parseDescFile(f, cfg.BRIDGE_PURPOSE):
+ splitter.insert(bridge)
+ f.close()
+
+def startup(cfg):
+ key = getKey(MASTER_KEY_FILE)
+
+ store = anydbm.open(cfg.DB_FILE, "c", 0600)
+ if DB_LOG_FILE:
+ dblogfile = open(cfg.DB_LOG_FILE, "a+", 0)
+ store = LogDB("db", store, dblogfile)
+
+ splitter = Bridges.BridgeSplitter(Bridges.get_hmac(key, "Splitter-Key"),
+ Bridges.PrefixStore(store, "sp|"))
+
+ if cfg.HTTPS_DIST and cfg.HTTPS_SHARE:
+ ipDistrbutor = Dist.ipBasedDistributor(Dist.uniformMap,
+ Dist.N_IP_CLUSTERS,
+ Bridges.get_hmac(key, "HTTPS-IP-Dist-Key"))
+ splitter.addRing(ipDistributor, "https", cfg.HTTPS_SHARE)
+
+ if cfg.EMAIL_DIST and cfg.EMAIL_SHARE:
+ emailDistributor = Dist.emailBasedDistributor(
+ Bridges.get_hmac(key, "Email-Dist-Key"),
+ Bridges.PrefixStore(store, "em|"))
+ splitter.addRing(emailDistributor, "email", cfg.EMAIL_SHARE)
+
+ if cfg.RESERVED_SHARE:
+ splitter.addRing(Bridges.UnallocatedHolder(),
+ "unallocated",
+ cfg.RESERVED_SHARE)
+
+ stats = Bridges.BridgeTracker(Bridges.PrefixStore(store, "fs"),
+ Bridges.PrefixStore(store, "ls"))
+ splitter.addTracker(stats)
+
+ load(cfg, splitter)
+
+ # XXXX create twisted listeners.
+