[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r12812: Fix bugs to the point where everything in bridgedb works; ch (in bridgedb/trunk: . lib/bridgedb)



Author: nickm
Date: 2007-12-14 15:32:50 -0500 (Fri, 14 Dec 2007)
New Revision: 12812

Added:
   bridgedb/trunk/bridgedb.conf
Modified:
   bridgedb/trunk/
   bridgedb/trunk/TODO
   bridgedb/trunk/lib/bridgedb/Bridges.py
   bridgedb/trunk/lib/bridgedb/Dist.py
   bridgedb/trunk/lib/bridgedb/Main.py
   bridgedb/trunk/lib/bridgedb/Server.py
Log:
 r17150@catbus:  nickm | 2007-12-14 15:32:46 -0500
 Fix bugs to the point where everything in bridgedb works; check in an initial configuration file.



Property changes on: bridgedb/trunk
___________________________________________________________________
 svk:merge ticket from /bridgedb/trunk [r17150] on 8246c3cf-6607-4228-993b-4d95d33730f1

Modified: bridgedb/trunk/TODO
===================================================================
--- bridgedb/trunk/TODO	2007-12-14 19:11:32 UTC (rev 12811)
+++ bridgedb/trunk/TODO	2007-12-14 20:32:50 UTC (rev 12812)
@@ -2,6 +2,10 @@
 For dec:
 - reload bridges on sighup
 - write a README
+- make the configuration logic a little more sensible.
+- proper logging
+- check that incoming IP of email is sane.
+- check more email headers for sanity
 
 Later:
 - document stuff better
@@ -13,4 +17,3 @@
 - decent template for web interface
 - decent template for mail interface
 - implement 'help' command
- 
\ No newline at end of file

Added: bridgedb/trunk/bridgedb.conf
===================================================================
--- bridgedb/trunk/bridgedb.conf	                        (rev 0)
+++ bridgedb/trunk/bridgedb.conf	2007-12-14 20:32:50 UTC (rev 12812)
@@ -0,0 +1,82 @@
+
+#==========
+# General-purpose options.
+
+# We chdir to this directory when we start; all files with relative
+# pathnames are created under this directory
+RUN_IN_DIR = "~/run/"
+
+# Files from which we read descriptors on start and SIGHUP
+# XXXX SIGHUP not implemented.
+BRIDGE_FILES = [ "./cached-descriptors", "./cached-descriptors.new" ]
+
+# Only consider routers whose purpose matches this string.
+BRIDGE_PURPOSE = "bridge"
+# File to store persistent info in.
+DB_FILE = "./bridgedist.db"
+# File to log changes to persistent info in.  For debugging and bugfixing.
+DB_LOG_FILE = "./bridgedist.log"
+# File in which we store our secret HMAC root key.
+MASTER_KEY_FILE = "./secret_key"
+
+# How many clusters do we group IPs in when distributing bridges based on IP?
+N_IP_CLUSTERS = 4
+
+#==========
+# Options related to HTTPS
+
+# True if we are enableing distribution via HTTP or HTTPS; False otherwise.
+HTTPS_DIST = True
+# What proportion of bridges do we allocate to HTTP distribution?  See
+# EMAIL_SHARE and RESERVED_SHARE.
+HTTPS_SHARE=10
+# An IP address (form "1.2.3.4") where we listen for HTTPS connections.
+# "None" to listen on the default interface.
+HTTPS_BIND_IP=None
+# Port to listen on for incoming HTTPS connections
+HTTPS_PORT=6789
+# Certificate file
+HTTPS_CERT_FILE="cert"
+# Private key file.
+HTTPS_KEY_FILE="privkey.pem"
+# IP and port to listen on for unencrypted HTTP connections. Debugging only.
+HTTP_UNENCRYPTED_BIND_IP=None
+HTTP_UNENCRYPTED_PORT=None
+# How many bridges do we give back in an answer?
+HTTPS_N_BRIDGES_PER_ANSWER=2
+
+#==========
+# Options related to Email
+
+# True if we are enableing distribution via Email; false otherwise.
+EMAIL_DIST = True
+# What proportion of bridges do we allocate to Email distribution?  See
+# HTTPS_SHARE and RESERVED_SHARE.
+EMAIL_SHARE=10
+# Canonical versions of domains that we will reply to.
+EMAIL_DOMAINS = [ "gmail.com", "yahoo.com" ]
+# Map from unofficial domain to canonical domain.
+EMAIL_DOMAIN_MAP = { "mail.google.com" : "gmail.com",
+                     "googlemail.com" : "gmail.com",
+                     }
+# If there are any IPS in this list, only allow incoming connections from
+# those IPs.
+EMAIL_RESTRICT_IPS=[]
+# IP and port to listen on for email connections. Debugging only.
+EMAIL_BIND_IP=None
+EMAIL_PORT=6725
+# How many bridges to we give back in an answer?
+EMAIL_N_BRIDGES_PER_ANSWER=2
+
+#==========
+# Options related to unallocated bridges.
+
+# We split bridges into a group accessible by HTTPS queries; a group
+# accessible by email queries; and a group that we don't assign to any
+# query mechanism.  Once a bridge is assigned to either of the first
+# two groups, it stays there persistently.  The bridges are allocated
+# to these groups in an proportion of
+#   HTTPS_SHARE : EMAIL_SHARE : RESERVED_SHARE
+RESERVED_SHARE=2
+
+

Modified: bridgedb/trunk/lib/bridgedb/Bridges.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Bridges.py	2007-12-14 19:11:32 UTC (rev 12811)
+++ bridgedb/trunk/lib/bridgedb/Bridges.py	2007-12-14 20:32:50 UTC (rev 12812)
@@ -50,7 +50,7 @@
     if len(fp) != HEX_FP_LEN:
         return False
     try:
-        toHex(fp)
+        fromHex(fp)
     except TypeError:
         return False
     else:
@@ -89,8 +89,9 @@
                 raise TypeError("Bridge with invalid ID")
             self.fingerprint = toHex(id_digest)
         elif fingerprint is not None:
-            if not is_valid_fingerprint(fromHex(fingerprint)):
-                raise TypeError("Bridge with invalid fingerprint")
+            if not is_valid_fingerprint(fingerprint):
+                raise TypeError("Bridge with invalid fingerprint (%r)"%
+                                fingerprint)
             self.fingerprint = fingerprint.lower()
         else:
             raise TypeError("Bridge with no ID")
@@ -153,6 +154,9 @@
         self.isSorted = False
         self.sortedKeys = []
 
+    def __len__(self):
+        return len(self.bridgesByID)
+
     def insert(self, bridge):
         id = bridge.getID()
         pos = self.hmac(id)
@@ -243,9 +247,9 @@
         n = len(self._p)
         return [ k[n:] for k in self._d.keys() if k.startswith(self._p) ]
 
-def FixedBridgeSplitter(BridgeHolder):
+class FixedBridgeSplitter(BridgeHolder):
     def __init__(self, key, rings):
-        self.hmac = get_mac_fn(key, hex=True)
+        self.hmac = get_hmac_fn(key, hex=True)
         self.rings = rings[:]
         for r in self.rings:
             assert(isinstance(r, BridgeHolder))
@@ -255,8 +259,15 @@
         digest = self.hmac(bridge.getID())
         pos = long( digest[:8], 16 )
         which = pos % len(self.rings)
-        self.ring[which].insert(bridge)
+        self.rings[which].insert(bridge)
 
+    def __len__(self):
+        n = 0
+        for r in self.rings:
+            n += len(r)
+        return n
+
+
 class UnallocatedHolder(BridgeHolder):
     def insert(self, bridge):
         pass
@@ -278,9 +289,9 @@
         # The first-seen time only gets updated if it wasn't already set.
         self.firstSeenStore.setdefault(bridgeID, now)
 
-def BridgeSplitter(BridgeHolder):
+class BridgeSplitter(BridgeHolder):
     def __init__(self, key, store):
-        self.hmac = hmac.new(key, digestmod=DIGESTMOD)
+        self.hmac = get_hmac_fn(key, hex=True)
         self.store = store
         self.ringsByName = {}
         self.totalP = 0
@@ -288,6 +299,12 @@
         self.rings = []
         self.statsHolders = []
 
+    def __len__(self):
+        n = 0
+        for r in self.rings:
+            n += len(r)
+        return n
+
     def addRing(self, ring, ringname, p=1):
         assert isinstance(ring, BridgeHolder)
         self.ringsByName[ringname] = ring
@@ -310,7 +327,7 @@
         else:
             pos = self.hmac(bridgeID)
             n = int(pos[:8], 16) % self.totalP
-            pos = bisect.bisect_right(self.pValues, p) - 1
+            pos = bisect.bisect_right(self.pValues, n) - 1
             assert 0 <= pos < len(self.rings)
             ringname = self.rings[pos]
             ring = self.ringsByName.get(ringname)

Modified: bridgedb/trunk/lib/bridgedb/Dist.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Dist.py	2007-12-14 19:11:32 UTC (rev 12811)
+++ bridgedb/trunk/lib/bridgedb/Dist.py	2007-12-14 20:32:50 UTC (rev 12812)
@@ -29,10 +29,10 @@
         self.splitter = bridgedb.Bridges.FixedBridgeSplitter(key2, self.rings)
 
         key3 = bridgedb.Bridges.get_hmac(key, "Order-Areas-In-Rings")
-        self.areaOrderHmac = bridgedb.Bridges.get_hmac_fn(key3, hex=True)
+        self.areaOrderHmac = bridgedb.Bridges.get_hmac_fn(key3, hex=False)
 
         key4 = bridgedb.Bridges.get_hmac(key, "Assign-Areas-To-Rings")
-        self.areaClusterHmac = bridgedb.Bridges.get_hmac_fun(key4, hex=True)
+        self.areaClusterHmac = bridgedb.Bridges.get_hmac_fn(key4, hex=True)
 
     def insert(self, bridge):
         self.splitter.insert(bridge)
@@ -125,10 +125,10 @@
     def __init__(self, key, store, domainmap):
 
         key1 = bridgedb.Bridges.get_hmac(key, "Map-Addresses-To-Ring")
-        self.emailHmac = bridgedb.Bridges.get_hmac_fn(key1, hex=1)
+        self.emailHmac = bridgedb.Bridges.get_hmac_fn(key1, hex=False)
 
         key2 = bridgedb.Bridges.get_hmac(key, "Order-Bridges-In-Ring")
-        self.ring = bridgedb.Bridges.BrigeRing(key2)
+        self.ring = bridgedb.Bridges.BridgeRing(key2)
         self.store = store
         self.domainmap = domainmap
 
@@ -136,12 +136,12 @@
         self.ring.insert(bridge)
 
     def getBridgesForEmail(self, emailaddress, epoch, N=1):
-        emailaddress = normalizeEmail(emailaddress)
-        if emailAddress is None:
+        emailaddress = normalizeEmail(emailaddress, self.domainmap)
+        if emailaddress is None:
             return [] #XXXX raise an exception.
-        if store.has_key(emailaddress):
+        if self.store.has_key(emailaddress):
             result = []
-            ids = store[emailaddress]
+            ids = self.store[emailaddress]
             for id in bridgedb.Bridges.chopString(ids, bridgedb.Bridges.ID_LEN):
                 b = self.ring.getBridgeByID(id)
                 if b != None:
@@ -149,7 +149,7 @@
             return result
 
         pos = self.emailHmac("<%s>%s" % (epoch, emailaddress))
-        result = ring.getBridges(pos, N)
+        result = self.ring.getBridges(pos, N)
         memo = "".join(b.getID() for b in result)
         self.store[emailaddress] = memo
         return result

Modified: bridgedb/trunk/lib/bridgedb/Main.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Main.py	2007-12-14 19:11:32 UTC (rev 12811)
+++ bridgedb/trunk/lib/bridgedb/Main.py	2007-12-14 20:32:50 UTC (rev 12812)
@@ -17,29 +17,32 @@
         self.__dict__.update(attrs)
 
 CONFIG = Conf(
+    RUN_IN_DIR = ".",
+
     BRIDGE_FILES = [ "./cached-descriptors", "./cached-descriptors.new" ],
     BRIDGE_PURPOSE = "bridge",
-    DB_FILE = [ "./bridgedist" ],
-    DB_LOG_FILE = [ "./bridgedist.log" ],
+    DB_FILE = "./bridgedist.db",
+    DB_LOG_FILE = "./bridgedist.log",
 
-    N_IP_CLUSTERS = 8,
-    MASTER_KEY_FILE = [ "./secret_key" ],
+    N_IP_CLUSTERS = 4,
+    MASTER_KEY_FILE = "./secret_key",
 
     HTTPS_DIST = True,
     HTTPS_SHARE=10,
     HTTPS_BIND_IP=None,
     HTTPS_PORT=6789,
     HTTPS_CERT_FILE="cert",
-    HTTPS_KEY_FILE="key",
+    HTTPS_KEY_FILE="privkey.pem",
     HTTP_UNENCRYPTED_BIND_IP=None,
     HTTP_UNENCRYPTED_PORT=6788,
     HTTPS_N_BRIDGES_PER_ANSWER=2,
 
     EMAIL_DIST = True,
     EMAIL_SHARE=10,
-    EMAIL_DOMAINS = [ "gmail.com", "yahoo.com" ],
+    EMAIL_DOMAINS = [ "gmail.com", "yahoo.com", "catbus.wangafu.net" ],
     EMAIL_DOMAIN_MAP = { "mail.google.com" : "gmail.com",
                          "googlemail.com" : "gmail.com", },
+    EMAIL_RESTRICT_IPS=[],
     EMAIL_BIND_IP=None,
     EMAIL_PORT=6725,
     EMAIL_N_BRIDGES_PER_ANSWER=2,
@@ -64,7 +67,7 @@
     True
     """
     try:
-        f = open(fname, 'r')
+        f = open(fname, 'rb')
     except IOError:
         k = os.urandom(32)
         flags = os.O_WRONLY|os.O_TRUNC|os.O_CREAT|getattr(os, "O_BIN", 0)
@@ -85,28 +88,40 @@
         f.close()
 
 def startup(cfg):
-    key = getKey(MASTER_KEY_FILE)
+    cfg.BRIDGE_FILES = [ os.path.expanduser(fn) for fn in cfg.BRIDGE_FILES ]
+    for key in ("RUN_IN_DIR", "DB_FILE", "DB_LOG_FILE", "MASTER_KEY_FILE",
+                "HTTPS_CERT_FILE", "HTTPS_KEY_FILE"):
+        v = getattr(cfg, key)
+        if v:
+            setattr(cfg, key, os.path.expanduser(v))
+
+    if cfg.RUN_IN_DIR:
+        os.chdir(cfg.RUN_IN_DIR)
+
+    key = getKey(cfg.MASTER_KEY_FILE)
     dblogfile = None
+    emailDistributor = ipDistributor = None
 
     baseStore = store = anydbm.open(cfg.DB_FILE, "c", 0600)
-    if DB_LOG_FILE:
+    if cfg.DB_LOG_FILE:
         dblogfile = open(cfg.DB_LOG_FILE, "a+", 0)
-        store = LogDB("db", store, dblogfile)
+        store = Bridges.LogDB("db", store, dblogfile)
 
     splitter = Bridges.BridgeSplitter(Bridges.get_hmac(key, "Splitter-Key"),
                                       Bridges.PrefixStore(store, "sp|"))
 
     if cfg.HTTPS_DIST and cfg.HTTPS_SHARE:
-        ipDistrbutor = Dist.ipBasedDistributor(Dist.uniformMap,
-                                 Dist.N_IP_CLUSTERS,
-                                 Bridges.get_hmac(key, "HTTPS-IP-Dist-Key"))
+        ipDistributor = Dist.IPBasedDistributor(
+            Dist.uniformMap,
+            cfg.N_IP_CLUSTERS,
+            Bridges.get_hmac(key, "HTTPS-IP-Dist-Key"))
         splitter.addRing(ipDistributor, "https", cfg.HTTPS_SHARE)
         webSchedule = Time.IntervalSchedule("day", 2)
 
     if cfg.EMAIL_DIST and cfg.EMAIL_SHARE:
         for d in cfg.EMAIL_DOMAINS:
             cfg.EMAIL_DOMAIN_MAP[d] = d
-        emailDistributor = Dist.emailBasedDistributor(
+        emailDistributor = Dist.EmailBasedDistributor(
             Bridges.get_hmac(key, "Email-Dist-Key"),
             Bridges.PrefixStore(store, "em|"),
             cfg.EMAIL_DOMAIN_MAP.copy())
@@ -118,11 +133,18 @@
                          "unallocated",
                          cfg.RESERVED_SHARE)
 
-    stats = Bridges.BridgeTracker(Bridges.PrefixStore(store, "fs"),
-                                  Bridges.PrefixStore(store, "ls"))
+    stats = Bridges.BridgeTracker(Bridges.PrefixStore(store, "fs|"),
+                                  Bridges.PrefixStore(store, "ls|"))
     splitter.addTracker(stats)
 
+    print "Loading bridges"
     load(cfg, splitter)
+    print "%d bridges loaded" % len(splitter)
+    if emailDistributor:
+        print "%d for email" % len(emailDistributor.ring)
+    if ipDistributor:
+        print "%d for web:" % len(ipDistributor.splitter)
+        print "  by location set:", " ".join(str(len(r)) for r in ipDistributor.rings)
 
     if cfg.HTTPS_DIST and cfg.HTTPS_SHARE:
         Server.addWebServer(cfg, ipDistributor, webSchedule)
@@ -131,9 +153,21 @@
         Server.addSMTPServer(cfg, emailDistributor, emailSchedule)
 
     try:
-        Server.run()
+        print "Starting reactors."
+        Server.runServers()
     finally:
         baseStore.close()
         if dblogfile is not None:
             dblogfile.close()
 
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        print "Syntax: %s [config file]" % sys.argv[0]
+        sys.exit(1)
+    if sys.argv[1] == "TESTING":
+        configuration = CONFIG
+    else:
+        configuration = {}
+        execfile(sys.argv[1], configuration)
+
+    startup(CONFIG)

Modified: bridgedb/trunk/lib/bridgedb/Server.py
===================================================================
--- bridgedb/trunk/lib/bridgedb/Server.py	2007-12-14 19:11:32 UTC (rev 12811)
+++ bridgedb/trunk/lib/bridgedb/Server.py	2007-12-14 20:32:50 UTC (rev 12812)
@@ -15,6 +15,8 @@
 import twisted.web.server
 import twisted.mail.smtp
 
+import bridgedb.Dist
+
 class WebResource(twisted.web.resource.Resource):
     isLeaf = True
 
@@ -36,7 +38,7 @@
         return "<html><body><pre>%s</pre></body></html>" % answer
 
 def addWebServer(cfg, dist, sched):
-    from twised.web.server import Site
+    from twisted.web.server import Site
     resource = WebResource(dist, sched, cfg.HTTPS_N_BRIDGES_PER_ANSWER)
     site = Site(resource)
     if cfg.HTTP_UNENCRYPTED_PORT:
@@ -53,6 +55,7 @@
 
 class MailFile:
     def __init__(self, lines):
+        self.lines = lines
         self.idx = 0
     def readline(self):
         try :
@@ -64,59 +67,64 @@
 
 def getMailResponse(lines, ctx):
     # Extract data from the headers.
-    msg = rfc822(MailFile(lines))
+    msg = rfc822.Message(MailFile(lines))
     subject = msg.getheader("Subject", None)
     if not subject: subject = "[no subject]"
     clientFromAddr = msg.getaddr("From")
     clientSenderAddr = msg.getaddr("Sender")
     msgID = msg.getheader("Message-ID")
-    if clientSenderAddr:
+    if clientSenderAddr and clientSenderAddr[1]:
         clientAddr = clientSenderAddr[1]
-    elif clientFromAddr:
+    elif clientFromAddr and clientFromAddr[1]:
         clientAddr = clientFromAddr[1]
     else:
-        return None
+        print "No from header. WTF."
+        return None,None
     for ln in lines:
         if ln.strip() in ("get bridges", "Subject: get bridges"):
             break
     else:
-        return None
+        print "No request for bridges."
+        return None,None
 
     try:
         interval = ctx.schedule.getInterval(time.time())
         bridges = ctx.distributor.getBridgesForEmail(clientAddr,
                                                      interval, ctx.N)
-    except bridgedb.Dist.BadEmail:
-        return None
+    except bridgedb.Dist.BadEmail, e:
+        print "Bad email addr in request: %s"%e
+        return None, None
     if not bridges:
-        return None
+        print "No bridges available."
+        return None, None
 
     # Generate the message.
     f = StringIO()
     w = MimeWriter.MimeWriter(f)
-    w.addHeader("From", ctx.fromAddr)
-    w.addHeader("To", clientAddr)
-    w.addHeader("Message-ID", twisted.mail.smtp.messageid())
+    w.addheader("From", ctx.fromAddr)
+    w.addheader("To", clientAddr)
+    w.addheader("Message-ID", twisted.mail.smtp.messageid())
     if not subject.startswith("Re:"): subject = "Re: %s"%subject
-    w.addHeader("Subject", subject)
-    w.addHeader("In-Reply-To", msgID)
-    w.addHeader("Date", twisted.mail.smtp.rfc822date())
+    w.addheader("Subject", subject)
+    w.addheader("In-Reply-To", msgID)
+    w.addheader("Date", twisted.mail.smtp.rfc822date())
     body = w.startbody("text/plain")
     for b in bridges:
         body.write("%s\n" % b.getConfigLine())
 
     f.seek(0)
-    return f
+    return clientAddr, f
 
 def replyToMail(lines, ctx):
-    sendToUser, response = getMailResponse(lines)
+    sendToUser, response = getMailResponse(lines, ctx)
     if response is None:
         return
+    response.seek(0)
     d = Deferred()
     factory = twisted.mail.smtp.SMTPSenderFactory(
         ctx.fromAddr,
         sendToUser,
-        StringIO(response),
+        response,
         d)
     reactor.connectTCP(ctx.smtpServer, ctx.smtpPort, factory)
     return d
@@ -143,7 +151,7 @@
 
     def lineReceived(self, line):
         self.nBytes += len(line)
-        if self.nBytes > ctx.maximumSize:
+        if self.nBytes > self.ctx.maximumSize:
             self.ignoring = True
         else:
             self.lines.append(line)
@@ -151,7 +159,7 @@
     def eomReceived(self):
         if not self.ignoring:
             replyToMail(self.lines, self.ctx)
-        return defer.succeed(None)
+        return twisted.internet.defer.succeed(None)
 
     def connectionLost(self):
         pass
@@ -169,7 +177,7 @@
         if user.dest.local != self.ctx.username:
             raise twisted.mail.smtp.SMTPBadRcpt(user)
         return lambda: MailMessage(self.ctx)
-
+    
 class MailFactory(twisted.mail.smtp.SMTPFactory):
     def __init__(self, *a, **kw):
         twisted.mail.smtp.SMTPFactory.__init__(self, *a, **kw)
@@ -185,7 +193,7 @@
         return p
 
 def addSMTPServer(cfg, dist, sched):
-    ctx = MailContext(cfg)
+    ctx = MailContext(cfg, dist, sched)
     factory = MailFactory()
     factory.setBridgeDBContext(ctx)
     ip = cfg.EMAIL_BIND_IP or ""