[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] [bridgedb/master] Drop bridge-email persistence. Instead, rate-limit email replies.



Author: Nick Mathewson <nickm@xxxxxxxxxxxxxx>
Date: Mon, 12 Oct 2009 16:26:31 -0400
Subject: Drop bridge-email persistence.  Instead, rate-limit email replies.
Commit: 0817b95a9f36c124c6c3cf262a5e0be3a43efa22

Also, we drop the requirement that email messages must be in some
special form.
---
 TODO                    |   11 ++++++-----
 lib/bridgedb/Dist.py    |   37 ++++++++++++++++++++++++-------------
 lib/bridgedb/Server.py  |   19 ++++++++++++-------
 lib/bridgedb/Storage.py |   32 +++++++++++++++++---------------
 lib/bridgedb/Tests.py   |   34 ++++++++++++++++++++++------------
 5 files changed, 81 insertions(+), 52 deletions(-)

diff --git a/TODO b/TODO
index 0b73856..6baa149 100644
--- a/TODO
+++ b/TODO
@@ -4,9 +4,10 @@ RSN:
 X Drop email->bridge mappings after N days
 o Bump up epoch length a lot.
 . Test DB migration code.
-- Drop email persistence
-- Rate-limit email replies.
-- Stop looking for "get bridges" in emails.
+o Drop email persistence.
+o Rate-limit email replies.
+o Stop looking for "get bridges" in emails.
+o Clean email backend periodically 
 - Write a spec.
 
 
@@ -24,8 +25,8 @@ X Make the 'magic word' for the email configurable, case-tolerant,
 - make all the rest of the email options configurable.
 - bug: the email handler gets really upset when the email doesn't have
   a message-id header in it.
-- When we hit the end of a period, forget email address history.
-- When sending bridges to an email address in the history, check for
+o When we hit the end of a period, forget email address history.
+X When sending bridges to an email address in the history, check for
   liveness.
 . Make bounces go to the right address.
   o Make address sent in "mail from" command configurable.
diff --git a/lib/bridgedb/Dist.py b/lib/bridgedb/Dist.py
index 701e82b..6410a28 100644
--- a/lib/bridgedb/Dist.py
+++ b/lib/bridgedb/Dist.py
@@ -121,6 +121,8 @@ ADDRSPEC = r'(%s)\@(%s)'%(DOTATOM, DOMAIN)
 SPACE_PAT = re.compile(r'\s+')
 ADDRSPEC_PAT = re.compile(ADDRSPEC)
 
+MAX_EMAIL_RATE = 3*3600
+
 class BadEmail(Exception):
     """Exception raised when we get a bad email address."""
     def __init__(self, msg, email):
@@ -132,6 +134,10 @@ class UnsupportedDomain(BadEmail):
        don't know."""
     pass
 
+class TooSoonEmail(BadEmail):
+    """Raised when we got a request from this address too recently."""
+    pass
+
 def extractAddrSpec(addr):
     """Given an email From line, try to extract and parse the addrspec
        portion.  Returns localpart,domain on success; raises BadEmail
@@ -233,6 +239,7 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
                be any string, so long as it changes with every period.
            N -- the number of bridges to try to give back.
         """
+        now = time.time()
         try:
           emailaddress = normalizeEmail(emailaddress, self.domainmap,
                                       self.domainrules)
@@ -243,25 +250,29 @@ class EmailBasedDistributor(bridgedb.Bridges.BridgeHolder):
 
         db = bridgedb.Storage.getDB()
 
-        ids = db.getEmailedBridges(emailaddress)
-
-        if ids:
-            logging.info("We've seen %r before. Sending the same bridges"
-                         " as last time", emailaddress)
-            result = []
-            for fp in ids:
-                b = self.ring.getBridgeByID(bridgedb.Bridges.fromHex(fp))
-                if b != None:
-                    result.append(b)
-            return result
+        lastSaw = db.getEmailTime(emailadress)
+        if lastSaw + MAX_EMAIL_RATE >= now:
+            log.warning("Got a request for bridges from %r; we already "
+                        "answered one within the last %d seconds. Ignoring.",
+                        emailaddress, MAX_EMAIL_RATE)
+            raise TooSoonEmail("Too many emails; wait till later", emailaddress)
 
         pos = self.emailHmac("<%s>%s" % (epoch, emailaddress))
         result = self.ring.getBridges(pos, N)
 
-        db.addEmailedBridges(emailaddress, time.time(),
-                             [b.fingerprint for b in result])
+        db.setEmailTime(emailaddress, now)
         db.commit()
         return result
 
     def __len__(self):
         return len(self.ring)
+
+    def cleanDatabase(self):
+        db = bridgedb.Storage.getDB()
+        try:
+            db.cleanEmailedBridges(time.time()-MAX_EMAIL_RATE)
+        except:
+            db.rollback()
+            raise
+        else:
+            db.commit()
diff --git a/lib/bridgedb/Server.py b/lib/bridgedb/Server.py
index 4af849b..d45907b 100644
--- a/lib/bridgedb/Server.py
+++ b/lib/bridgedb/Server.py
@@ -16,6 +16,7 @@ from zope.interface import implements
 
 from twisted.internet import reactor
 from twisted.internet.defer import Deferred
+from twisted.internet.task import LoopingCall
 import twisted.web.resource
 import twisted.web.server
 import twisted.mail.smtp
@@ -220,13 +221,13 @@ def getMailResponse(lines, ctx):
             return None, None
 
     # Was the magic string included
-    for ln in lines:
-        if ln.strip().lower() in ("get bridges", "subject: get bridges"):
-            break
-    else:
-        logging.info("Got a mail from %r with no bridge request; dropping",
-                     clientAddr)
-        return None,None
+    #for ln in lines:
+    #    if ln.strip().lower() in ("get bridges", "subject: get bridges"):
+    #        break
+    #else:
+    #    logging.info("Got a mail from %r with no bridge request; dropping",
+    #                 clientAddr)
+    #    return None,None
 
     # Figure out which bridges to send
     try:
@@ -385,8 +386,12 @@ def addSMTPServer(cfg, dist, sched):
     factory.setBridgeDBContext(ctx)
     ip = cfg.EMAIL_BIND_IP or ""
     reactor.listenTCP(cfg.EMAIL_PORT, factory, interface=ip)
+    # Set up a LoopingCall to run every 30 minutes and forget old email times.
+    lc = LoopingCall(dist.cleanDatabase)
+    lc.start(1800, now=False)
     return factory
 
 def runServers():
     """Start all the servers that we've configured. Exits when they do."""
     reactor.run()
+
diff --git a/lib/bridgedb/Storage.py b/lib/bridgedb/Storage.py
index d3e0e22..703595a 100644
--- a/lib/bridgedb/Storage.py
+++ b/lib/bridgedb/Storage.py
@@ -2,6 +2,7 @@
 # Copyright (c) 2007-2009, The Tor Project, Inc.
 # See LICENSE for licensing information
 
+import calendar
 import os
 import logging
 import bridgedb.Bridges
@@ -18,6 +19,8 @@ def _escapeValue(v):
 
 def timeToStr(t):
     return time.strftime("%Y-%m-%d %H:%M", time.gmtime(t))
+def strToTime(t):
+    return calendar.timegm(time.strptime(t, "%Y-%m-%d %H:%M"))
 
 class SqliteDict:
     """
@@ -126,12 +129,11 @@ SCHEMA1_SCRIPT = """
  CREATE UNIQUE INDEX BridgesKeyIndex ON Bridges ( hex_key );
 
  CREATE TABLE EmailedBridges (
-     email NOT NULL,
-     when_mailed,
-     id INTEGER REFERENCES Bridges(id)
+     email PRIMARY KEY NOT NULL,
+     when_mailed
  );
 
- CREATE INDEX EmailedBridgesEmailIndex ON EmailedBridges ( email );
+ CREATE INDEX EmailedBridgesWhenMailed on EmailedBridges ( email );
 """
 
 
@@ -182,22 +184,22 @@ class Database:
         cur = self._cur
         t = timeToStr(expireBefore)
 
-        cur.execute("DELETE FROM Bridges WHERE when_mailed < ?", t);
+        cur.execute("DELETE FROM EmailedBridges WHERE when_mailed < ?", (t,));
 
-    def getEmailedBridges(self, addr):
+    def getEmailTime(self, addr):
         cur = self._cur
-        cur.execute("SELECT hex_key FROM EmailedBridges, Bridges WHERE "
-                    "email = ? AND Bridges.id = EmailedBridges.id", (addr,))
-        return [ hk for hk, in cur.fetchall() ]
+        cur.execute("SELECT when_mailed FROM EmailedBridges WHERE "
+                    "email = ?", (addr,))
+        v = cur.fetchone()
+        if v is None:
+            return None
+        return strToTime(v[0])
 
-    def addEmailedBridges(self, addr, whenMailed, bridgeKeys):
+    def setEmailTime(self, addr, whenMailed):
         cur = self._cur
         t = timeToStr(whenMailed)
-        for k in bridgeKeys:
-            assert(len(k))==HEX_ID_LEN
-        cur.executemany("INSERT INTO EmailedBridges (email,when_mailed,id) "
-                        "SELECT ?,?,id FROM Bridges WHERE hex_key = ?",
-                        [(addr,t,k) for k in bridgeKeys])
+        cur.execute("INSERT OR REPLACE INTO EmailedBridges "
+                    "(email,when_mailed) VALUES (?,?)", (addr, t))
 
 def openDatabase(sqlite_file):
     conn = sqlite3.Connection(sqlite_file)
diff --git a/lib/bridgedb/Tests.py b/lib/bridgedb/Tests.py
index 826b3c4..f5198c4 100644
--- a/lib/bridgedb/Tests.py
+++ b/lib/bridgedb/Tests.py
@@ -140,6 +140,9 @@ class SQLStorageTests(unittest.TestCase):
         os.close(self.fd)
         os.unlink(self.fname)
 
+    def assertCloseTo(self, a, b, delta=60):
+        self.assertTrue(abs(a-b) <= delta)
+
     def testBridgeStorage(self):
         db = self.db
         B = bridgedb.Bridges.Bridge
@@ -184,16 +187,25 @@ class SQLStorageTests(unittest.TestCase):
         v = cur.fetchone()
         self.assertEquals(v, (3,))
 
-        r = db.getEmailedBridges("abc@xxxxxxxxxxx")
-        self.assertEquals(r, [])
-        db.addEmailedBridges("abc@xxxxxxxxxxx", t, [k1,k2])
-        db.addEmailedBridges("def@xxxxxxxxxxx", t+1000, [k2,k3])
-        r = db.getEmailedBridges("abc@xxxxxxxxxxx")
-        self.assertEquals(sorted(r), sorted([k1,k2]))
-        r = db.getEmailedBridges("def@xxxxxxxxxxx")
-        self.assertEquals(sorted(r), sorted([k2,k3]))
-        r = db.getEmailedBridges("ghi@xxxxxxxxxxx")
-        self.assertEquals(r, [])
+        r = db.getEmailTime("abc@xxxxxxxxxxx")
+        self.assertEquals(r, None)
+        db.setEmailTime("abc@xxxxxxxxxxx", t)
+        db.setEmailTime("def@xxxxxxxxxxx", t+1000)
+        r = db.getEmailTime("abc@xxxxxxxxxxx")
+        self.assertCloseTo(r, t)
+        r = db.getEmailTime("def@xxxxxxxxxxx")
+        self.assertCloseTo(r, t+1000)
+        r = db.getEmailTime("ghi@xxxxxxxxxxx")
+        self.assertEquals(r, None)
+
+        db.cleanEmailedBridges(t+200)
+        db.setEmailTime("def@xxxxxxxxxxx", t+5000)
+        r = db.getEmailTime("abc@xxxxxxxxxxx")
+        self.assertEquals(r, None)
+        r = db.getEmailTime("def@xxxxxxxxxxx")
+        self.assertCloseTo(r, t+5000)
+        cur.execute("SELECT * FROM EmailedBridges")
+        self.assertEquals(len(cur.fetchall()), 1)
 
 def testSuite():
     suite = unittest.TestSuite()
@@ -215,5 +227,3 @@ def main():
 
     unittest.TextTestRunner(verbosity=1).run(testSuite())
 
-
-
-- 
1.5.6.5