[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[minion-cvs] First cut of generic s-expression module. This will be ...



Update of /home/minion/cvsroot/src/minion/lib/mixminion
In directory moria:/tmp/cvs-serv16395/lib/mixminion

Added Files:
	SExpression.py 
Log Message:
First cut of generic s-expression module. This will be the new format for several things.  Hopefully, the nymbaron folk can use this instead of having to fork their own.  It should be usable by other projects too.

--- NEW FILE: SExpression.py ---
# Copyright 2005 Nick Mathewson.  See LICENSE for licensing information.
# $Id: SExpression.py,v 1.1 2005/12/04 00:05:33 nickm Exp $

"""mixminion.SExpression

   Implementation of Rivest's s-expressions as defined at
         http://theory.lcs.mit.edu/~rivest/sexp.txt
   and as used in SPKI and and SDSI.

   Mixminion doesn't use some of the features here; this is intended
   as a general-use pure-python implementation of the "proposed" SExpr
   standard.

   The functions below, we represent an s-epression as a set of nested
   sequences of strings.  For example, the s-expression
        (hello world (1 2 3) x)
   may be represented in python as
        ("hello", "world", ["1", "2", "3"], "x")
   or as
        ("hello", "world", ("1", "2", "3"), "x")

   Strings with 'display hints' are encoded as instances of the
   DisplayHint class below.

"""

__all__ = [ "DisplayHint",
            "write_canonical", "parse_canonical", "encode_canonical",
            "write_pretty", "parse", "encode_pretty"
            ]

import base64
import binascii
import re
import sys
import types

class DisplayHint:
    def __init__(self, hint, s):
        self.hint = hint
        self.s = s
    def getHint(self):
        return self.hint
    def __str__(self):
        return self.s

def _writeRaw(write,s):
    write(str(len(s)))
    write(":")
    write(s)

_TOKEN_RE = re.compile(r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*")
def _writeToken(write,s):
    assert _TOKEN_RE.match(s)
    write(s)

def _writeCanon_atom(write, rep):
    if isinstance(rep, types.StringType):
        _writeRaw(write, rep)
        return 1
    try:
        getHint = rep.getHint
    except AttributeError:
        return 0
    else:
        write("[")
        _writeRaw(write,getHint())
        write("]")
        _writeRaw(str(rep))
        return 1

def write_canonical(write, rep):
    if _writeCanon_atom(write, rep):
        return

    stack = []
    idx = 0
    write("(")
    while 1:
        while idx == len(rep):
            write(")")
            try:
                rep,idx = stack.pop()
            except IndexError:
                return
        if _writeCanon_atom(write,rep[idx]):
            idx += 1
            continue
        stack.append((rep,idx+1))
        rep = rep[idx]
        idx = 0
        write("(")

def _enc_raw(rep):
    if isinstance(rep, types.StringType):
        return "%s:%s"%(len(rep), rep)
    try:
        getHint = rep.getHint
    except AttributeError:
        return None
    else:
        hint = getHint()
        return "[%s:%s]%s:%s"%(len(hint),hint,len(rep),rep)

def encode_canonical(rep):
    s = _enc_raw(rep)
    if s is not None:
        return s
    stack = []
    idx = 0
    result = [ "(" ]
    add = result.append
    while 1:
        while idx == len(rep):
            add(")")
            try:
                rep,idx = stack.pop()
            except IndexError:
                return "".join(result)
        s = _enc_raw(rep[idx])
        if s is not None:
            add(s)
            idx += 1
            continue
        stack.append((rep,idx+1))
        rep = rep[idx]
        idx = 0
        add("(")

_RAW_PAT = re.compile(r'^[1-9][0-9]*:')

def _parseRaw_s(s, hint=0):
    if not hint and s[0] == '[':
        hint, rest = _parseRaw_s(s[1:], 1)
        if rest[:1] != "]":
            raise FormatError
        s, rest = _parseRaw_s(rest[1:], 1)
        return DisplayHint(hint, s), rest
    m = _RAW_PAT.match(s)
    if not m:
        raise FormatError()
    lngth = int(m.group(0)[:-1])
    s = s[m.end():]
    if len(s)<lngth:
        raise FormatError()
    return s[:lngth], s[lngth:]

class FormatError(Exception):
    pass

def parse_canonical(s):
    cur = top = []
    stack = []
    while s:
        first = s[0]
        if first not in '()':
            item, s = _parseRaw_s(s)
            cur.append(item)
            continue
        elif first == '(':
            next = []
            cur.append(next)
            stack.append(cur)
            cur = next
            s = s[1:]
        else:
            try:
                cur = stack.pop()
            except IndexError:
                raise FormatError()
            s = s[1:]
    if stack or len(top) != 1:
        raise FormatError()
    return top[0]

_TOKEN_RE = re.compile(r'''
  \s*(?:
        ([0-9]+:)                               | # Raw string: 1
        ([a-zA-Z0-9\-\.\/\_\:\*\+\=]
         [a-zA-Z0-9\-\.\/\_\:\*\+\=0-9]*)       | # Token: 2
        (?:([0-9]*)\#([a-fA-F0-9\s]*)\#)        | # Hex string: 3,4
        (?:([0-9]*)\|([a-zA-Z0-9\+\/\=\s]*)\|)  | # Base 64: 5,6
        (?:([0-9]*)\"((?:[^\\\"]+|\\.)*)\")     | # Quoted: 7,8
        (\()                                    | # Open paren: 9
        (\))                                    | # Close pare: 10
        (\[)                                      # Start of a display hint: 11
     )
   ''', re.X+re.DOTALL)

_QUOTED_CHAR_RE = re.compile(r'''
     \\\\ | \\[abtnvfr] | \\ \r \n ? | \\ \n \r ? |
     \\x[A-Fa-f0-9]{2} | \\[0-7]{1,3}
   ''', re.X)

_QUOTED_CHAR_MAP =  { 'a' : '\a',
                      'b' : '\b',
                      't' : '\t',
                      'n' : '\n',
                      'v' : '\v',
                      'f' : '\f',
                      'r' : '\r',
                      }


def _unescape_quoted_char(m):
    s = m.group(0)
    try:
        return _QUOTED_CHAR_MAP[s[1]]
    except KeyError:
        pass
    if s[1] in "\n\r":
        return ""
    elif s[1] == 'x':
        return chr(int(s[2:],16))
    else:
        return chr(int(s[1:],8))

def _parse_tok(s):
    "s -> (string or 1[(] or 2[)], rest) "
    m = _TOKEN_RE.match(s)
    if not m:
        if s.isspace():
            return None, None
        else:
            raise FormatError()
    rest = s[m.end():]
    g = m.groups()
    if g[1]: # raw string
        ln = int(g[1][:-1])
        return rest[:ln], rest[ln:]
    elif g[2]: # token
        return g[2], rest
    elif g[4]: # Hex string
        r = binascii.a2b_hex(g[4])
        if g[3]:
            ln = int(g[3])
            if ln != len(r):
                raise FormatError()
        return r, rest
    elif g[6]: # Base64 string
        m4 = len(g[6])%4
        if m4:
            g[6] += ("="*(4-m4))
        r = binascii.a2b_base64(g[6])
        if g[5]:
            ln = int(g[5])
            if ln != len(r):
                raise FormatError()
        return r, rest
    elif g[8]: # Quoted string
        r = _QUOTED_CHAR_RE.sub(_unescape_quoted_char, g[8])
        if g[7]:
            ln = int(g[7])
            if ln != len(r):
                raise FormatError()
        return r, rest
    elif g[9]:
        return 1, rest
    elif g[10]:
        return 2, rest
    else:
        assert s == "["
        hint, rest = _parse_tok(rest)
        if hint in (1,2,None) or isinstance(hint, DisplayHint):
            raise FormatError()
        try:
            idx = rest.find("]")
        except ValueError:
            raise FormatError()
        if rest[:idx-1] and not rest[:idx-1].isspace():
            raise FormatError()
        s, rest = _parse_tok(rest[idx:])
        if s in (1,2,None) or isinstance(hint, DisplayHint):
            raise FormatError()
        return DisplayHint(hint, s), rest

def parse(s):
    cur = top = []
    stack = []
    while 1:
        tok, s = _parse_tok(s)
        if tok == 1:
            next = []
            cur.append(next)
            stack.append(cur)
            cur = next
        elif tok == 2:
            try:
                cur = stack.pop()
            except IndexError:
                raise FormatError()
        elif tok == None:
            if stack or len(top) != 1:
                raise FormatError()
            return top[0]
        else:
            cur.append(tok)

_STR_RE = re.compile(r'''
    ([a-zA-Z0-9\-\.\/\_\:\*\+\=]
     [a-zA-Z0-9\-\.\/\_\:\*\+\=0-9]*) | # Could be a token.
    ([\a\b\t\n\v\f\r\x20-\x7E]+)        # Could be quoted.
  ''', re.X)

_QUOTED_MAP = { '\b' : "\\b",
                '\t' : "\\t",
                '\v' : "\\v",
                '\n' : "\\n",
                '\f' : "\\f",
                '\r' : "\\r",
                '"'  : "\"",
                '\b' : "\\b",
                '\\' : "\\", }
for x in xrange(128):
    if 32 <= x <= 126:
        _QUOTED_MAP[chr(x)] = chr(x)
    elif not _QUOTED_MAP.has_key(chr(x)):
        _QUOTED_MAP[chr(x)] = "\\x%02x"%x
del x

def _writeQuoted(write,s):
    m = _QUOTED_MAP
    write('"')
    for ch in s:
        f.write(m[ch])
    write('"')

def _write_str(write, s, indent, hint=0):
    if not s:
        write('""')
        return
    m = _STR_RE.match(s)
    if m.group(1):
        write(s)
    elif m.group(2):
        _writeQuoted(write,s)
    else:
        ind = " "*indent
        write("\n%s#"%ind)
        for i in xrange(0, len(s), 30):
            write(binascii.b2a_hex(s[i:i+30]))
            if i+30 < len(s):
                write("\n%s "%ind)
        if hint:
            write("#")
        else:
            write("#\n%s"%ind)

def _write_atom(write, s, indent):
    if isinstance(s, types.StringType):
        _write_str(write, s, indent,0)
        return 1
    try:
        getHint = s.getHint
    except AttributeError:
        return 0
    else:
        write("[")
        _write_str(write,getHint(),indent,1)
        write("]")
        _write_str(write,str(s),indent,0)
        return 1


def write_pretty(write, rep, indent_step=1):
    if _write_atom(write, rep, 0):
        return

    stack = []
    idx = 0
    indent = 0
    write("(")
    while 1:
        while idx == len(rep):
            write(")")
            indent -= indent_step
            try:
                rep,idx = stack.pop()
            except IndexError:
                write("\n")
                return
        if _write_atom(write,rep[idx], indent):
            idx += 1
            if idx < len(rep):
                write(" ")
            continue
        stack.append((rep,idx+1))
        rep = rep[idx]
        idx = 0
        indent += indent_step
        write("\n%s("%(" "*indent))

write_pretty(sys.stdout.write, [["abc","d","ef",["1"]]])