[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [gettor/master] Enhance the blacklisting mechanism:
commit 6a240005e764defa6269392ded78857e13426ba4
Author: Christian Fromme <kaner@xxxxxxxxxx>
Date: Tue Aug 30 22:41:59 2011 +0200
Enhance the blacklisting mechanism:
- Users can request a configurable number of packages until they're
blacklisted, instead of only one
- After copying the email normalization code from BridgeDB (thanks,
rransom) abusing GetTor just got a bit harder
Closes #3381
---
gettor.conf | 3 +
lib/gettor/blacklist.py | 42 ++++++++++++++++----
lib/gettor/config.py | 3 +
lib/gettor/requests.py | 4 +-
lib/gettor/responses.py | 27 +++++++------
lib/gettor/utils.py | 99 ++++++++++++++++++++++++++++++++++++++++-------
6 files changed, 142 insertions(+), 36 deletions(-)
diff --git a/gettor.conf b/gettor.conf
index 93b8d68..29a2494 100644
--- a/gettor.conf
+++ b/gettor.conf
@@ -27,6 +27,9 @@ DUMPFILE = "gettor.dump"
# for it?
BLACKLIST_BY_TYPE = True
+# How many packages per type do we sent to a user before we blacklist him?
+BLACKLIST_THRES = 3
+
# Which mirror to sync packages from
RSYNC_MIRROR = "rsync.torproject.org"
diff --git a/lib/gettor/blacklist.py b/lib/gettor/blacklist.py
index 83cf188..1ddcfd2 100644
--- a/lib/gettor/blacklist.py
+++ b/lib/gettor/blacklist.py
@@ -5,15 +5,17 @@
import os
import re
import glob
+import struct
import logging
import gettor.utils
class BWList:
- def __init__(self, blacklistDir):
+ def __init__(self, blacklistDir, blacklistThres):
"""A blacklist lives as hash files inside a directory and is simply a
number of files that represent hashed email addresses.
"""
self.blacklistDir = blacklistDir
+ self.blacklistThres = blacklistThres
# "general" is the main blacklist
self.createSublist("general")
@@ -29,17 +31,39 @@ class BWList:
# XXX Change this to something more appropriate
raise IOError("Bad dir: %s" % fullDir)
- def lookupListEntry(self, address, blacklistName="*"):
+ def entryExists(self, address, blacklistName="general"):
+ """Look up if a certain address is already blacklisted
+ """
+ hashString = self.getHash(address)
+ globPath = os.path.join(self.blacklistDir, blacklistName)
+ hashVec = glob.glob(os.path.join(globPath, hashString))
+ if len(hashVec) > 0:
+ if os.path.isfile(hashVec[0]):
+ return True
+
+ return False
+
+ def checkAndUpdate(self, address, blacklistName="*", update=False):
"""Check to see if we have a list entry for the given address.
"""
- if address is None:
- logging.error("Argument 'address' is None")
- return False
hashString = self.getHash(address)
globPath = os.path.join(self.blacklistDir, blacklistName)
hashVec = glob.glob(os.path.join(globPath, hashString))
if len(hashVec) > 0:
- return True
+ count = ""
+ with open(hashVec[0], 'r') as fd:
+ count = fd.read()
+
+ i_count = int(count)
+ i_count += 1
+ count = str(i_count)
+
+ if update == True:
+ with open(hashVec[0], 'w+') as fd:
+ fd.write("%s\n" % count)
+
+ if i_count >= self.blacklistThres:
+ return True
return False
def createListEntry(self, address, blacklistName="general"):
@@ -48,12 +72,12 @@ class BWList:
if address is None:
logging.error("Bad args in createListEntry()")
return False
- if self.lookupListEntry(address, blacklistName) == False:
+ if self.entryExists(address, blacklistName) == False:
hashString = self.getHash(address)
entry = os.path.join(self.blacklistDir, blacklistName, hashString)
try:
- fd = open(entry, 'w')
- fd.close
+ with open(entry, 'w+') as fd:
+ fd.write("0\n")
return True
except:
logging.error("Creating list entry %s failed." % entry)
diff --git a/lib/gettor/config.py b/lib/gettor/config.py
index 16fb8b8..1f1eb88 100644
--- a/lib/gettor/config.py
+++ b/lib/gettor/config.py
@@ -18,6 +18,8 @@
DUMPFILE: Where failed mails get stored
BLACKLIST_BY_TYPE: Do we send every mail type to every user only once before
we blacklist them for it?
+ BLACKLIST_THRES: How many packages per type do we sent to a user before we
+ blacklist him/her?
RSYNC_MIRROR: Which rsync server to sync packages from
DEFAULT_LOCALE: Default locale
SUPP_LANGS: Supported languages by GetTor
@@ -42,6 +44,7 @@ CONFIG_DEFAULTS = {
'PASSFILE': "gettor.pass",
'DUMPFILE': "./gettor.dump",
'BLACKLIST_BY_TYPE': True,
+ 'BLACKLIST_THRES': 3,
'RSYNC_MIRROR': "rsync.torproject.org",
'DEFAULT_LOCALE': "en",
'SUPP_LANGS': { 'en': ("english", ), },
diff --git a/lib/gettor/requests.py b/lib/gettor/requests.py
index b29e335..2ee275e 100644
--- a/lib/gettor/requests.py
+++ b/lib/gettor/requests.py
@@ -22,7 +22,9 @@ class requestMail:
self.config = config
self.request = {}
self.request['user'] = self.parsedMessage["Return-Path"]
- self.request['hashed_user'] = gettor.utils.getHash(self.request['user'])
+ # Normalize address before hashing
+ normalized_addr = gettor.utils.normalizeAddress(self.request['user'])
+ self.request['hashed_user'] = gettor.utils.getHash(normalized_addr)
self.request['ouraddr'] = self.getRealTo(self.parsedMessage["to"])
self.request['locale'] = self.getLocaleInTo(self.request['ouraddr'])
self.request['package'] = None
diff --git a/lib/gettor/responses.py b/lib/gettor/responses.py
index 0f3d2f8..e8dcc17 100644
--- a/lib/gettor/responses.py
+++ b/lib/gettor/responses.py
@@ -16,6 +16,7 @@ from email.mime.base import MIMEBase
from email.mime.text import MIMEText
import gettor.blacklist
+import gettor.utils
import gettor.i18n as i18n
def getGreetingText(t):
@@ -189,8 +190,8 @@ class Response:
# Init black & whitelists
wlStateDir = os.path.join(self.config.BASEDIR, "wl")
blStateDir = os.path.join(self.config.BASEDIR, "bl")
- self.wList = gettor.blacklist.BWList(wlStateDir)
- self.bList = gettor.blacklist.BWList(blStateDir)
+ self.wList = gettor.blacklist.BWList(wlStateDir, config.BLACKLIST_THRES)
+ self.bList = gettor.blacklist.BWList(blStateDir, config.BLACKLIST_THRES)
def sendReply(self):
"""All routing decisions take place here. Sending of mails takes place
@@ -222,21 +223,22 @@ class Response:
type name we're looking for
"""
# First of all, check if user is whitelisted: Whitelist beats Blacklist
- if self.wList.lookupListEntry(self.reqInfo['user'], "general"):
+ normalized_addr = gettor.utils.normalizeAddress(self.reqInfo['user'])
+ if self.wList.entryExists(normalized_addr, "general"):
logging.info("Whitelisted user " + self.reqInfo['hashed_user'])
return False
# Now check general and specific blacklists, in that order
- if self.bList.lookupListEntry(self.reqInfo['user'], "general"):
+ if self.bList.entryExists(normalized_addr, "general"):
logging.info("Blacklisted user " + self.reqInfo['hashed_user'])
return True
# Create a unique dir name for the requested routine
self.bList.createSublist(fname)
- if self.bList.lookupListEntry(self.reqInfo['user'], fname):
+ if self.bList.checkAndUpdate(normalized_addr, fname, True):
logging.info("User %s is blacklisted for %s" \
% (self.reqInfo['hashed_user'], fname))
return True
else:
- self.bList.createListEntry(self.reqInfo['user'], fname)
+ self.bList.createListEntry(normalized_addr, fname)
return False
def sendPackage(self):
@@ -317,15 +319,16 @@ class Response:
splitDir = os.path.join(self.config.BASEDIR, "packages", splitpack)
fileList = os.listdir(splitDir)
- # Be a polite bot and send message that mail is on the way
- if self.config.DELAY_ALERT:
- if not self.sendDelayAlert():
- logging.error("Failed to sent delay alert.")
-
# Sort the files, so we can send 01 before 02 and so on..
fileList.sort()
nFiles = len(fileList)
num = 0
+
+ # Be a polite bot and send message that mail is on the way
+ if self.config.DELAY_ALERT:
+ if not self.sendDelayAlert(nFiles):
+ logging.error("Failed to sent delay alert.")
+
# For each available split file, send a mail
for filename in fileList:
path = os.path.join(splitDir, filename)
@@ -360,7 +363,7 @@ class Response:
packageInfo = self.reqInfo['package']
logging.info("Sending delay alert to %s" % self.reqInfo['hashed_user'])
- return self.sendTextEmail(getDelayAlertMsg(self.t), packageInfo)
+ return self.sendTextEmail(getDelayAlertMsg(self.t, packageInfo))
def sendHelp(self):
"""Send a help mail. This happens when a user sent us a request we
diff --git a/lib/gettor/utils.py b/lib/gettor/utils.py
index d7f0725..c06e40f 100644
--- a/lib/gettor/utils.py
+++ b/lib/gettor/utils.py
@@ -110,7 +110,7 @@ def addWhitelistEntry(conf, address):
wlStateDir = conf.BASEDIR + "/wl"
logging.debug("Adding address to whitelist: %s" % address)
try:
- whiteList = gettor.blacklist.BWList(wlStateDir)
+ whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES)
except IOError, e:
logging.error("Whitelist error: %s" % e)
return False
@@ -127,7 +127,7 @@ def addBlacklistEntry(conf, address):
logging.debug("Adding address to blacklist: %s" % address)
blStateDir = conf.BASEDIR + "/bl"
try:
- blackList = gettor.blacklist.BWList(blStateDir)
+ blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES)
except IOError, e:
logging.error("Blacklist error: %s" % e)
return False
@@ -146,15 +146,15 @@ def lookupAddress(conf, address):
wlStateDir = conf.BASEDIR + "/wl"
blStateDir = conf.BASEDIR + "/bl"
try:
- whiteList = gettor.blacklist.BWList(wlStateDir)
- blackList = gettor.blacklist.BWList(blStateDir)
+ whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES)
+ blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES)
except IOError, e:
logging.error("White/Blacklist error: %s" % e)
return False
- if whiteList.lookupListEntry(address, "general"):
+ if whiteList.checkAndUpdate(address, "general"):
logging.info("Address '%s' is present in the whitelist." % address)
found = True
- if blackList.lookupListEntry(address, "general"):
+ if blackList.checkAndUpdate(address, "general"):
logging.info("Address '%s' is present in the blacklist." % address)
found = True
if not found:
@@ -169,7 +169,7 @@ def clearWhitelist(conf):
"""
wlStateDir = conf.BASEDIR + "/wl"
try:
- whiteList = gettor.blacklist.BWList(wlStateDir)
+ whiteList = gettor.blacklist.BWList(wlStateDir, conf.BLACKLIST_THRES)
except IOError, e:
logging.error("Whitelist error: %s" % e)
return False
@@ -188,7 +188,7 @@ def clearBlacklist(conf, olderThanDays):
logging.debug("Clearing blacklist..")
blStateDir = conf.BASEDIR + "/bl"
try:
- blackList = gettor.blacklist.BWList(blStateDir)
+ blackList = gettor.blacklist.BWList(blStateDir, conf.BLACKLIST_THRES)
except IOError, e:
logging.error("Blacklist error: %s" % e)
return False
@@ -262,13 +262,11 @@ def getCurrentCrontab():
return savedTab
def normalizeAddress(address):
- """We need this because we internally store email addresses in this format
- in the black- and whitelists
+ """This does everything from checking if the address is ok to stripping
+ dots and "+" addresses so absuing GetTor gets harder.
"""
- if address.startswith("<"):
- return address
- else:
- return "<" + address + ">"
+ address = normalizeEmail(address)
+ return "<" + address + ">"
def stripEmail(address):
@@ -300,3 +298,76 @@ def removeFromListByRegex(l, string):
return l
+# The following code is more or less taken from BridgeDB
+
+class BadEmail(Exception):
+ """Exception raised when we get a bad email address."""
+ def __init__(self, msg, email):
+ Exception.__init__(self, msg)
+ self.email = email
+
+ASPECIAL = '-_+/=_~'
+
+ACHAR = r'[\w%s]' % "".join("\\%s"%c for c in ASPECIAL)
+DOTATOM = r'%s+(?:\.%s+)*'%(ACHAR,ACHAR)
+DOMAIN = r'\w+(?:\.\w+)*'
+ADDRSPEC = r'(%s)\@(%s)'%(DOTATOM, DOMAIN)
+
+SPACE_PAT = re.compile(r'\s+')
+ADDRSPEC_PAT = re.compile(ADDRSPEC)
+
+def extractAddrSpec(addr):
+ """Given an email From line, try to extract and parse the addrspec
+ portion. Returns localpart,domain on success; raises BadEmail
+ on failure.
+ """
+ orig_addr = addr
+ addr = SPACE_PAT.sub(' ', addr)
+ addr = addr.strip()
+ # Only works on usual-form addresses; raises BadEmail on weird
+ # address form. That's okay, since we'll only get those when
+ # people are trying to fool us.
+ if '<' in addr:
+ # Take the _last_ index of <, so that we don't need to bother
+ # with quoting tricks.
+ idx = addr.rindex('<')
+ addr = addr[idx:]
+ m = re.search(r'<([^>]*)>', addr)
+ if m is None:
+ raise BadEmail("Couldn't extract address spec", orig_addr)
+ addr = m.group(1)
+
+ # At this point, addr holds a putative addr-spec. We only allow the
+ # following form:
+ # addr-spec = local-part "@" domain
+ # local-part = dot-atom
+ # domain = dot-atom
+ #
+ # In particular, we are disallowing: obs-local-part, obs-domain,
+ # comment, obs-FWS,
+ #
+ # Other forms exist, but none of the incoming services we recognize
+ # support them.
+ addr = addr.replace(" ", "")
+ m = ADDRSPEC_PAT.match(addr)
+ if not m:
+ raise BadEmail("Bad address spec format", orig_addr)
+ localpart, domain = m.groups()
+ return localpart, domain
+
+def normalizeEmail(addr):
+ """Given the contents of a from line, and a map of supported email
+ domains (in lowercase), raise BadEmail or return a normalized
+ email address.
+ """
+ addr = addr.lower()
+ localpart, domain = extractAddrSpec(addr)
+
+ # addr+foo@ is an alias for addr@
+ idx = localpart.find('+')
+ if idx >= 0:
+ localpart = localpart[:idx]
+ localpart = localpart.replace(".", "")
+
+ return "%s@%s"%(localpart, domain)
+
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits