[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[minion-cvs] Optimize ServerInfo parsing.



Update of /home/minion/cvsroot/src/minion/lib/mixminion
In directory moria.mit.edu:/tmp/cvs-serv4002/lib/mixminion

Modified Files:
	Config.py ServerInfo.py benchmark.py 
Log Message:
Optimize ServerInfo parsing.

With directories, parsing and processing server descriptors becomes time-
critical.  This patch optimises unpickling, parsing, and validating
server descriptors, improving their performane by a factor of about X2 in
the non-signature-checking case.

The improvement comes as follows:
      * Don't convert the modulus to a python long when we only want
        to check the exponent. (38%)
      * Remove unnecessary stripspace from parsting base64-encoded data. (21%)
      * Optimize reading config files, inlining the the 'readconfigline'
        method, and splitting out the restricted and unrestricted cases. (27%)
      * Inline the makegmtime function in _parseData. (6%)
      * Optimize loops in Config.__reload. (7%) 
        
I've also added some code to make Config and ServerInfo more forgiving
about messed up whitespace and newlines (in response to versions of 
Mixmaster that barfed when faced with directores that had non-platform
newlines).  This slows down ServerInfo validation by 3 to 5% in the
non-signature-checking case.



Index: Config.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/Config.py,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -d -r1.27 -r1.28
--- Config.py	16 Dec 2002 02:42:31 -0000	1.27
+++ Config.py	29 Dec 2002 20:46:54 -0000	1.28
@@ -18,6 +18,9 @@
 
    All identifiers are case-sensitive.
 
+   Because of cross-platform stupidity, we recognize any sequence of [\r\n]
+   as a newline, and who's to tell us we can't?
+
    Example:
 
    [Section1]
@@ -48,6 +51,7 @@
 
 __all__ = [ 'ConfigError', 'ClientConfig' ]
 
+import calendar
 import binascii
 import os
 import re
@@ -222,9 +226,9 @@
 def _parseBase64(s,_hexmode=0):
     """Validation function.  Converts a base-64 encoded config value into
        its original. Raises ConfigError on failure."""
-    s = stripSpace(s)
     try:
         if _hexmode:
+            s = stripSpace(s)
             return binascii.a2b_hex(s)
         else:
             return binascii.a2b_base64(s)
@@ -238,7 +242,7 @@
 
 def _parsePublicKey(s):
     """Validate function.  Converts a Base-64 encoding of an ASN.1
-       represented RSA public key with modulus 65535 into an RSA
+       represented RSA public key with modulus 65537 into an RSA
        object."""
     asn1 = _parseBase64(s)
     if len(asn1) > 550:
@@ -247,91 +251,52 @@
         key = mixminion.Crypto.pk_decode_public_key(asn1)
     except mixminion.Crypto.CryptoError:
         raise ConfigError("Invalid public key")
-    if key.get_public_key()[1] != 65537:
+    if key.get_exponent() != 65537:
         raise ConfigError("Invalid exponent on public key")
     return key
 
 # Regular expression to match YYYY/MM/DD
 _date_re = re.compile(r"(\d\d\d\d)/(\d\d)/(\d\d)")
-# Regular expression to match YYYY/MM/DD HH:MM:SS
-_time_re = re.compile(r"(\d\d\d\d)/(\d\d)/(\d\d) (\d\d):(\d\d):(\d\d)")
-def _parseDate(s,_timeMode=0):
+def _parseDate(s):
     """Validation function.  Converts from YYYY/MM/DD format to a (long)
        time value for midnight on that date."""
-    # If _timeMode is true, convert from YYYY/MM/DD HH:MM:SS instead.
-    s = s.strip()
-    r = (_date_re, _time_re)[_timeMode]
-    m = r.match(s)
-    if not m:
-        raise ConfigError("Invalid %s %r" % (("date", "time")[_timeMode],s))
-    if _timeMode:
-        yyyy, MM, dd, hh, mm, ss = map(int, m.groups())
-    else:
+    m = _date_re.match(s.strip())
+    try:
         yyyy, MM, dd = map(int, m.groups())
-        hh, mm, ss = 0, 0, 0
-
+    except (ValueError,AttributeError):
+        raise ConfigError("Invalid date %r"%s)
     if not ((1 <= dd <= 31) and (1 <= MM <= 12) and
-            (1970 <= yyyy)  and (0 <= hh < 24) and
-            (0 <= mm < 60)  and (0 <= ss <= 61)):
-        raise ConfigError("Invalid %s %r" % (("date","time")[_timeMode],s))
-
-    return mixminion.Common.mkgmtime(yyyy, MM, dd, hh, mm, ss)
+            (1970 <= yyyy)):
+        raise ConfigError("Invalid date %s"%s)
+    return calendar.timegm((yyyy,MM,dd,0,0,0,0,0,0))
 
+# Regular expression to match YYYY/MM/DD HH:MM:SS
+_time_re = re.compile(r"(\d\d\d\d)/(\d\d)/(\d\d) (\d\d):(\d\d):(\d\d)")
 def _parseTime(s):
     """Validation function.  Converts from YYYY/MM/DD HH:MM:SS format
        to a (float) time value for GMT."""
-    return _parseDate(s,1)
+    m = _time_re.match(s.strip())
+    if not m:
+        raise ConfigError("Invalid time %r" % s)
+    
+    yyyy, MM, dd, hh, mm, ss = map(int, m.groups())
+    if not ((1 <= dd <= 31) and (1 <= MM <= 12) and
+            (1970 <= yyyy)  and (0 <= hh < 24) and
+            (0 <= mm < 60)  and (0 <= ss <= 61)):
+        raise ConfigError("Invalid time %r" % s)
+
+    return calendar.timegm((yyyy,MM,dd,hh,mm,ss,0,0,0))
 
 #----------------------------------------------------------------------
 
 # Regular expression to match a section header.
-_section_re = re.compile(r'\[([^\]]+)\]')
+_section_re = re.compile(r'\[\s*([^\s\]]+)\s*\]')
 # Regular expression to match the first line of an entry
 _entry_re = re.compile(r'([^:= \t]+)(?:\s*[:=]|[ \t])\s*(.*)')
-# Regular expression to match an entry from a restricted file.
-_restricted_entry_re = re.compile(r'([^:= \t]+): (.*)')
-def _readConfigLine(line, restrict=0):
-    """Helper function.  Given a line of a configuration file, return
-       a (TYPE, VALUE) pair, where TYPE is one of the following:
-
-         None: The line is empty or a comment.
-         'ERR': The line is incorrectly formatted. VALUE is an error message.
-         'SEC': The line is a section header. VALUE is the section's name.
-         'ENT': The line is the first line of an entry. VALUE is a (K,V) pair.
-         'MORE': The line is a continuation line of an entry. VALUE is the
-                 contents of the line.
-
-       If 'restrict'  is true, only accept 'ENT' lines of the format "K: V"
-    """
-
-    if line == '':
-        return None, None
-
-    space = line[0] and line[0] in ' \t'
-    line = line.strip()
-    # If we have an all-space line, or comment, we have no data.
-    if line == '' or line[0] == '#':
-        return None, None
-    # If the line starts with space, it's a continuation.
-    elif space:
-        return "MORE", line
-    # If the line starts with '[', we've probably got a section heading.
-    elif line[0] == '[' and not space:
-        m = _section_re.match(line)
-        if not m:
-            return "ERR", "Bad section declaration"
-        return 'SEC', m.group(1).strip()
-    else:
-        # Now try to parse the line.
-        if restrict:
-            m = _restricted_entry_re.match(line)
-        else:
-            m = _entry_re.match(line)
-        if not m:
-            return "ERR", "Bad entry"
-        return "ENT", (m.group(1), m.group(2))
+# Regular expression to match bogus line endings.
+_abnormal_line_ending_re = re.compile(r'\r\n?')
 
-def _readConfigFile(contents, restrict=0):
+def _readConfigFile(contents):
     """Helper function. Given the string contents of a configuration
        file, returns a list of (SECTION-NAME, SECTION) tuples, where
        each SECTION is a list of (KEY, VALUE, LINENO) tuples.
@@ -356,29 +321,72 @@
 
     for line in fileLines:
         lineno += 1
-        type, val = _readConfigLine(line, restrict)
-        if type == 'ERR':
-            raise ConfigError("%s at line %s" % (val, lineno))
-        elif type == 'SEC':
-            curSection = [ ]
-            sections.append( (val, curSection) )
-        elif type == 'ENT':
-            key,val = val
-            if curSection is None:
-                raise ConfigError("Unknown section at line %s" %lineno)
-            curSection.append( (key, val, lineno) )
-        elif type == 'MORE':
-            if restrict:
-                raise ConfigError("Continuation not allowed at line %s"%lineno)
-            if not curSection:
+        if line == '':
+            continue
+        space = line[0] and line[0] in ' \t'
+        line = line.strip()
+        if line == '' or line[0] == '#':
+            continue
+        elif space:
+            try:
+                lastLine = curSection[-1]
+                curSection[-1] = (lastLine[0],
+                                  "%s %s" % (lastLine[1], line),lastLine[2])
+            except (IndexError, TypeError):
                 raise ConfigError("Unexpected indentation at line %s" %lineno)
-            lastLine = curSection[-1]
-            curSection[-1] = (lastLine[0],
-                              "%s %s" % (lastLine[1], val),lastLine[2])
+        elif line[0] == '[':
+            m = _section_re.match(line)
+            curSection = [ ]
+            sections.append( (m.group(1), curSection) )
         else:
-            assert type is None
-            if restrict:
-                raise ConfigError("Empty line not allowed at line %s"%lineno)
+            m = _entry_re.match(line)
+            if not m:
+                raise ConfigError("Bad entry at line %s"%lineno)
+            try:
+                curSection.append( (m.group(1), m.group(2), lineno) )
+            except AttributeError:
+                raise ConfigError("Unknown section at line %s" % lineno)
+
+    return sections
+    
+def _readRestrictedConfigFile(contents):
+    # List of (heading, [(key, val, lineno), ...])
+    sections = []
+    # [(key, val, lineno)] for the current section.
+    curSection = None
+    # Current line number
+    lineno = 0
+
+    # Make sure all characters in the file are ASCII.
+    if not isPrintingAscii(contents):
+        raise ConfigError("Invalid characters in file")
+
+    fileLines = contents.split("\n")
+    if fileLines[-1] == '':
+        del fileLines[-1]
+
+    for line in fileLines:
+        lineno += 1
+        line = line.strip()
+        if line == '' or line[0] == '#':
+            raise ConfigError("Empty line not allowed at line %s"%lineno)
+        elif line[0] == '[':
+            m = _section_re.match(line)
+            if not m:
+                raise ConfigError("Bad section declaration at line %s"%lineno)
+            curSection = [ ]
+            sections.append( (m.group(1), curSection) )
+        else:
+            colonIdx = line.find(':')
+            if colonIdx >= 1:
+                try:
+                    curSection.append( (line[:colonIdx].strip(), 
+                                        line[colonIdx+1:].strip(), lineno) )
+                except AttributeError:
+                    raise ConfigError("Unknown section at line %s" % lineno)
+            else:
+                raise ConfigError("Bad Entry at line %s" % lineno)
+        
     return sections
 
 def _formatEntry(key,val,w=79,ind=4):
@@ -458,11 +466,7 @@
         if filename:
             self.reload()
         elif string:
-            cs = StringIO(string)
-            try:
-                self.__reload(cs)
-            finally:
-                cs.close()
+            self.__reload(None, string)
         else:
             self.clear()
 
@@ -480,14 +484,22 @@
             return
         f = open(self.fname, 'r')
         try:
-            self.__reload(f)
+            self.__reload(f, None)
         finally:
             f.close()
 
-    def __reload(self, file):
-        """As in .reload(), but takes an open file object."""
-        fileContents = file.read()
-        sections = _readConfigFile(fileContents, self._restrictFormat)
+    def __reload(self, file, fileContents):
+        """As in .reload(), but takes an open file object _or_ a string."""
+        if fileContents is None:
+            fileContents = file.read()
+            file.close()
+
+        fileContents = _abnormal_line_ending_re.sub("\n", fileContents)
+
+        if self._restrictFormat:
+            sections = _readRestrictedConfigFile(fileContents)
+        else:
+            sections = _readConfigFile(fileContents)
 
         # These will become self.(_sections,_sectionEntries,_sectionNames)
         # if we are successful.
@@ -518,8 +530,9 @@
             # Set entries from the section, searching for bad entries
             # as we go.
             for k,v,line in secEntries:
-                rule, parseFn, default = secConfig.get(k, (None,None,None))
-                if not rule:
+                try:
+                    rule, parseFn, default = secConfig[k]
+                except KeyError:
                     raise ConfigError("Unrecognized key %s on line %s" %
                                       (k, line))
 
@@ -535,38 +548,39 @@
                 entryLines.append(line)
 
                 # Insert the entry, checking for impermissible duplicates.
-                if rule in ('REQUIRE*','ALLOW*'):
-                    if section.has_key(k):
-                        section[k].append(v)
-                    else:
-                        section[k] = [v]
-                else:
-                    assert rule in ('REQUIRE', 'ALLOW')
+                if rule in ('REQUIRE', 'ALLOW'):
                     if section.has_key(k):
                         raise ConfigError("Duplicate entry for %s at line %s"
                                           % (k, line))
                     else:
                         section[k] = v
+                else:
+                    assert rule in ('REQUIRE*','ALLOW*')
+                    try:
+                        section[k].append(v)
+                    except KeyError:
+                        section[k] = [v]
 
             # Check for missing entries, setting defaults and detecting
             # missing requirements as we go.
             for k, (rule, parseFn, default) in secConfig.items():
                 if k == '__SECTION__':
                     continue
-                if rule in ('REQUIRE', 'REQUIRE*') and not section.has_key(k):
-                    raise ConfigError("Missing entry %s from section %s"
-                                      % (k, secName))
                 elif not section.has_key(k):
-                    if parseFn is None or default is None:
-                        if rule == 'ALLOW*':
-                            section[k] = []
+                    if rule in ('REQUIRE', 'REQUIRE*'):
+                        raise ConfigError("Missing entry %s from section %s"
+                                          % (k, secName))
+                    else:                   
+                        if parseFn is None or default is None:
+                            if rule == 'ALLOW*':
+                                section[k] = []
+                            else:
+                                section[k] = default
+                        elif rule == 'ALLOW':
+                            section[k] = parseFn(default)
                         else:
-                            section[k] = default
-                    elif rule == 'ALLOW':
-                        section[k] = parseFn(default)
-                    else:
-                        assert rule == 'ALLOW*'
-                        section[k] = map(parseFn,default)
+                            assert rule == 'ALLOW*'
+                            section[k] = map(parseFn,default)
 
             cb = self._callbacks.get(secName, None)
             if cb:

Index: ServerInfo.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/ServerInfo.py,v
retrieving revision 1.26
retrieving revision 1.27
diff -u -d -r1.26 -r1.27
--- ServerInfo.py	16 Dec 2002 02:40:11 -0000	1.26
+++ ServerInfo.py	29 Dec 2002 20:46:54 -0000	1.27
@@ -10,6 +10,8 @@
 
 __all__ = [ 'ServerInfo' ]
 
+import re
+
 import mixminion.Config
 import mixminion.Crypto
 
@@ -170,6 +172,8 @@
        no values."""
     return _getServerInfoDigestImpl(info, rsa)
 
+_trailing_whitespace_re = re.compile(r'[ \t]+$', re.M)
+_special_line_re = re.compile(r'^(?:Digest|Signature):.*$', re.M)
 def _getServerInfoDigestImpl(info, rsa=None):
     """Helper method.  Calculates the correct digest of a server descriptor
        (as provided in a string).  If rsa is provided, signs the digest and
@@ -177,25 +181,15 @@
 
     # The algorithm's pretty easy.  We just find the Digest and Signature
     # lines, replace each with an 'Empty' version, and calculate the digest.
-    infoLines = info.split("\n")
-    if not infoLines[0] == "[Server]":
+    info = _trailing_whitespace_re.sub("", info)
+    if not info.startswith("[Server]"):
         raise ConfigError("Must begin with server section")
-    digestLine = None
-    signatureLine = None
-    infoLines = info.split("\n")
-    for lineNo in range(len(infoLines)):
-        line = infoLines[lineNo]
-        if line.startswith("Digest:") and digestLine is None:
-            digestLine = lineNo
-        elif line.startswith("Signature:") and signatureLine is None:
-            signatureLine = lineNo
-
-    assert digestLine is not None and signatureLine is not None
-
-    infoLines[digestLine] = 'Digest:'
-    infoLines[signatureLine] = 'Signature:'
-    info = "\n".join(infoLines)
-
+    def replaceFn(s):
+        if s.group(0)[0] == 'D':
+            return "Digest:"
+        else:
+            return "Signature:"
+    info = _special_line_re.sub(replaceFn, info)
     digest = mixminion.Crypto.sha1(info)
 
     if rsa is None:
@@ -205,8 +199,11 @@
     signature = mixminion.Crypto.pk_sign(digest,rsa)
     digest = formatBase64(digest)
     signature = formatBase64(signature)
-    infoLines[digestLine] = 'Digest: '+digest
-    infoLines[signatureLine] = 'Signature: '+signature
-
-    return "\n".join(infoLines)
+    def replaceFn2(s, digest=digest, signature=signature):
+        if s.group(0)[0] == 'D':
+            return "Digest: "+digest
+        else:
+            return "Signature: "+signature
 
+    info = _special_line_re.sub(replaceFn2, info)
+    return info

Index: benchmark.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/benchmark.py,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -d -r1.19 -r1.20
--- benchmark.py	16 Dec 2002 02:40:11 -0000	1.19
+++ benchmark.py	29 Dec 2002 20:46:54 -0000	1.20
@@ -13,21 +13,23 @@
 __pychecker__ = 'no-funcdoc no-reimport'
 __all__ = [ 'timeAll', 'testLeaks1', 'testLeaks2' ]
 
-
 import os
 import stat
+import cPickle
 from time import time
 
 import mixminion._minionlib as _ml
 from mixminion.BuildMessage import _buildHeader, buildForwardMessage
 from mixminion.Common import secureDelete, installSignalHandlers, \
-     waitForChildren
+     waitForChildren, formatBase64
 from mixminion.Crypto import *
 from mixminion.Crypto import OAEP_PARAMETER
 from mixminion.Crypto import _add_oaep_padding, _check_oaep_padding
 from mixminion.Packet import SMTP_TYPE
+from mixminion.ServerInfo import ServerInfo
 from mixminion.server.HashLog import HashLog
 from mixminion.server.PacketHandler import PacketHandler
+from mixminion.server.ServerConfig import ServerConfig
 from mixminion.test import FakeServerInfo
 from mixminion.testSupport import mix_mktemp
 
@@ -244,7 +246,20 @@
     print "Pad+RSA private decrypt", \
           timeit((lambda enc=enc,rsa=rsa: pk_decrypt(enc, rsa)),100)
 
-
+    print "RSA.get_public_key", timeit(rsa.get_public_key, 100)
+    print "RSA.get_exponent", timeit(rsa.get_exponent, 100)
+    print "RSA.get_modulus_bytes", timeit(rsa.get_modulus_bytes, 10000)
+    print "RSA.encode_key(public)", \
+          timeit(lambda rsa=rsa: rsa.encode_key(1), 100)
+    print "RSA.encode_key(private)", \
+          timeit(lambda rsa=rsa: rsa.encode_key(0), 100)
+    modulus = rsa.get_public_key()[0]
+    print "RSA from modulus", \
+          timeit(lambda modulus=modulus: pk_from_modulus(modulus), 10000)
+    asn1 = rsa.encode_key(1)
+    print "RSA from ASN1 (public)", \
+          timeit(lambda asn1=asn1: pk_decode_public_key(asn1), 10000)
+          
     print "RSA generate (1024 bit,e=65535)", timeit((lambda: pk_generate(1024,
                                                                   65535)),10)
     rsa = pk_generate()
@@ -340,11 +355,55 @@
         size += os.stat(fname+suffix)[stat.ST_SIZE]
 
     print "File size (%s entries)"%load, spacestr(size)
-
+    
+#----------------------------------------------------------------------
+def directoryTiming():
+    print "#========== DESCRIPTORS AND DIRECTORIES =============="
+    from mixminion.server.ServerKeys import ServerKeyring, \
+         generateServerDescriptorAndKeys
+    gen = generateServerDescriptorAndKeys
+    homedir = mix_mktemp()
+    confStr = """
+[Server]
+EncryptIdentityKey: no
+PublicKeyLifetime: 1 day
+EncryptPrivateKey: no
+Homedir: %s
+Mode: relay
+Nickname: The Server
+Contact-Email: a@b.c
+[Incoming/MMTP]
+Enabled: yes
+IP: 1.1.1.1
+""" % mix_mktemp()
+    config = ServerConfig(string=confStr)
+    keyring = ServerKeyring(config)
+    keyring.getIdentityKey()
+    print "Create and sign server descriptor", timeit(keyring.createKeys, 10)
+    liveKey = keyring.getServerKeyset()
+    descFile = liveKey.getDescriptorFileName()
+    desc = open(descFile).read()
+##     for _ in xrange(2000):
+##         ServerInfo(string=desc, assumeValid=0)
+##     if 1: return
+    
+    print "Parse server descriptor (no validation)", \
+          timeit(lambda desc=desc: ServerInfo(string=desc,assumeValid=1),
+                 400)
+    print "Parse server descriptor (full validation)", \
+          timeit(lambda desc=desc: ServerInfo(string=desc,assumeValid=0),
+                 400)
+    info = ServerInfo(string=desc)
+    dbin = cPickle.dumps(info, 1)
+    print "Unpickle binary-pickled descriptor (%s/%s)"%(len(dbin),len(desc)), \
+          timeit(lambda dbin=dbin: cPickle.loads(dbin), 400)
+    dtxt = cPickle.dumps(info, 0)
+    print "Unpickle text-pickled descriptor (%s/%s)"%(len(dtxt),len(desc)), \
+          timeit(lambda dtxt=dtxt: cPickle.loads(dtxt), 400)
+    
 #----------------------------------------------------------------------
 
 def buildMessageTiming():
-
     print "#================= BUILD MESSAGE ====================="
     pk = pk_generate()
     payload = ("Junky qoph flags vext crwd zimb."*1024)[:22*1024]
@@ -589,11 +648,17 @@
             _ml.rsa_make_public_key(n,e)
 
 #----------------------------------------------------------------------
+import base64
+import binascii
 
 def timeAll(name, args):
     cryptoTiming()
     buildMessageTiming()
-    hashlogTiming()
+    directoryTiming()
     fileOpsTiming()
     serverProcessTiming()
+    hashlogTiming()
     timeEfficiency()
+    #import profile
+    #profile.run("import mixminion.benchmark; mixminion.benchmark.directoryTiming()")
+