[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[minion-cvs] First cut of generic s-expression module. This will be ...
Update of /home/minion/cvsroot/src/minion/lib/mixminion
In directory moria:/tmp/cvs-serv16395/lib/mixminion
Added Files:
SExpression.py
Log Message:
First cut of generic s-expression module. This will be the new format for several things. Hopefully, the nymbaron folk can use this instead of having to fork their own. It should be usable by other projects too.
--- NEW FILE: SExpression.py ---
# Copyright 2005 Nick Mathewson. See LICENSE for licensing information.
# $Id: SExpression.py,v 1.1 2005/12/04 00:05:33 nickm Exp $
"""mixminion.SExpression
Implementation of Rivest's s-expressions as defined at
http://theory.lcs.mit.edu/~rivest/sexp.txt
and as used in SPKI and and SDSI.
Mixminion doesn't use some of the features here; this is intended
as a general-use pure-python implementation of the "proposed" SExpr
standard.
The functions below, we represent an s-epression as a set of nested
sequences of strings. For example, the s-expression
(hello world (1 2 3) x)
may be represented in python as
("hello", "world", ["1", "2", "3"], "x")
or as
("hello", "world", ("1", "2", "3"), "x")
Strings with 'display hints' are encoded as instances of the
DisplayHint class below.
"""
__all__ = [ "DisplayHint",
"write_canonical", "parse_canonical", "encode_canonical",
"write_pretty", "parse", "encode_pretty"
]
import base64
import binascii
import re
import sys
import types
class DisplayHint:
def __init__(self, hint, s):
self.hint = hint
self.s = s
def getHint(self):
return self.hint
def __str__(self):
return self.s
def _writeRaw(write,s):
write(str(len(s)))
write(":")
write(s)
_TOKEN_RE = re.compile(r"[a-zA-Z\-\.\/\_\:\*\+\=][a-zA-Z0-9\-\.\/\_\:\*\+\=]*")
def _writeToken(write,s):
assert _TOKEN_RE.match(s)
write(s)
def _writeCanon_atom(write, rep):
if isinstance(rep, types.StringType):
_writeRaw(write, rep)
return 1
try:
getHint = rep.getHint
except AttributeError:
return 0
else:
write("[")
_writeRaw(write,getHint())
write("]")
_writeRaw(str(rep))
return 1
def write_canonical(write, rep):
if _writeCanon_atom(write, rep):
return
stack = []
idx = 0
write("(")
while 1:
while idx == len(rep):
write(")")
try:
rep,idx = stack.pop()
except IndexError:
return
if _writeCanon_atom(write,rep[idx]):
idx += 1
continue
stack.append((rep,idx+1))
rep = rep[idx]
idx = 0
write("(")
def _enc_raw(rep):
if isinstance(rep, types.StringType):
return "%s:%s"%(len(rep), rep)
try:
getHint = rep.getHint
except AttributeError:
return None
else:
hint = getHint()
return "[%s:%s]%s:%s"%(len(hint),hint,len(rep),rep)
def encode_canonical(rep):
s = _enc_raw(rep)
if s is not None:
return s
stack = []
idx = 0
result = [ "(" ]
add = result.append
while 1:
while idx == len(rep):
add(")")
try:
rep,idx = stack.pop()
except IndexError:
return "".join(result)
s = _enc_raw(rep[idx])
if s is not None:
add(s)
idx += 1
continue
stack.append((rep,idx+1))
rep = rep[idx]
idx = 0
add("(")
_RAW_PAT = re.compile(r'^[1-9][0-9]*:')
def _parseRaw_s(s, hint=0):
if not hint and s[0] == '[':
hint, rest = _parseRaw_s(s[1:], 1)
if rest[:1] != "]":
raise FormatError
s, rest = _parseRaw_s(rest[1:], 1)
return DisplayHint(hint, s), rest
m = _RAW_PAT.match(s)
if not m:
raise FormatError()
lngth = int(m.group(0)[:-1])
s = s[m.end():]
if len(s)<lngth:
raise FormatError()
return s[:lngth], s[lngth:]
class FormatError(Exception):
pass
def parse_canonical(s):
cur = top = []
stack = []
while s:
first = s[0]
if first not in '()':
item, s = _parseRaw_s(s)
cur.append(item)
continue
elif first == '(':
next = []
cur.append(next)
stack.append(cur)
cur = next
s = s[1:]
else:
try:
cur = stack.pop()
except IndexError:
raise FormatError()
s = s[1:]
if stack or len(top) != 1:
raise FormatError()
return top[0]
_TOKEN_RE = re.compile(r'''
\s*(?:
([0-9]+:) | # Raw string: 1
([a-zA-Z0-9\-\.\/\_\:\*\+\=]
[a-zA-Z0-9\-\.\/\_\:\*\+\=0-9]*) | # Token: 2
(?:([0-9]*)\#([a-fA-F0-9\s]*)\#) | # Hex string: 3,4
(?:([0-9]*)\|([a-zA-Z0-9\+\/\=\s]*)\|) | # Base 64: 5,6
(?:([0-9]*)\"((?:[^\\\"]+|\\.)*)\") | # Quoted: 7,8
(\() | # Open paren: 9
(\)) | # Close pare: 10
(\[) # Start of a display hint: 11
)
''', re.X+re.DOTALL)
_QUOTED_CHAR_RE = re.compile(r'''
\\\\ | \\[abtnvfr] | \\ \r \n ? | \\ \n \r ? |
\\x[A-Fa-f0-9]{2} | \\[0-7]{1,3}
''', re.X)
_QUOTED_CHAR_MAP = { 'a' : '\a',
'b' : '\b',
't' : '\t',
'n' : '\n',
'v' : '\v',
'f' : '\f',
'r' : '\r',
}
def _unescape_quoted_char(m):
s = m.group(0)
try:
return _QUOTED_CHAR_MAP[s[1]]
except KeyError:
pass
if s[1] in "\n\r":
return ""
elif s[1] == 'x':
return chr(int(s[2:],16))
else:
return chr(int(s[1:],8))
def _parse_tok(s):
"s -> (string or 1[(] or 2[)], rest) "
m = _TOKEN_RE.match(s)
if not m:
if s.isspace():
return None, None
else:
raise FormatError()
rest = s[m.end():]
g = m.groups()
if g[1]: # raw string
ln = int(g[1][:-1])
return rest[:ln], rest[ln:]
elif g[2]: # token
return g[2], rest
elif g[4]: # Hex string
r = binascii.a2b_hex(g[4])
if g[3]:
ln = int(g[3])
if ln != len(r):
raise FormatError()
return r, rest
elif g[6]: # Base64 string
m4 = len(g[6])%4
if m4:
g[6] += ("="*(4-m4))
r = binascii.a2b_base64(g[6])
if g[5]:
ln = int(g[5])
if ln != len(r):
raise FormatError()
return r, rest
elif g[8]: # Quoted string
r = _QUOTED_CHAR_RE.sub(_unescape_quoted_char, g[8])
if g[7]:
ln = int(g[7])
if ln != len(r):
raise FormatError()
return r, rest
elif g[9]:
return 1, rest
elif g[10]:
return 2, rest
else:
assert s == "["
hint, rest = _parse_tok(rest)
if hint in (1,2,None) or isinstance(hint, DisplayHint):
raise FormatError()
try:
idx = rest.find("]")
except ValueError:
raise FormatError()
if rest[:idx-1] and not rest[:idx-1].isspace():
raise FormatError()
s, rest = _parse_tok(rest[idx:])
if s in (1,2,None) or isinstance(hint, DisplayHint):
raise FormatError()
return DisplayHint(hint, s), rest
def parse(s):
cur = top = []
stack = []
while 1:
tok, s = _parse_tok(s)
if tok == 1:
next = []
cur.append(next)
stack.append(cur)
cur = next
elif tok == 2:
try:
cur = stack.pop()
except IndexError:
raise FormatError()
elif tok == None:
if stack or len(top) != 1:
raise FormatError()
return top[0]
else:
cur.append(tok)
_STR_RE = re.compile(r'''
([a-zA-Z0-9\-\.\/\_\:\*\+\=]
[a-zA-Z0-9\-\.\/\_\:\*\+\=0-9]*) | # Could be a token.
([\a\b\t\n\v\f\r\x20-\x7E]+) # Could be quoted.
''', re.X)
_QUOTED_MAP = { '\b' : "\\b",
'\t' : "\\t",
'\v' : "\\v",
'\n' : "\\n",
'\f' : "\\f",
'\r' : "\\r",
'"' : "\"",
'\b' : "\\b",
'\\' : "\\", }
for x in xrange(128):
if 32 <= x <= 126:
_QUOTED_MAP[chr(x)] = chr(x)
elif not _QUOTED_MAP.has_key(chr(x)):
_QUOTED_MAP[chr(x)] = "\\x%02x"%x
del x
def _writeQuoted(write,s):
m = _QUOTED_MAP
write('"')
for ch in s:
f.write(m[ch])
write('"')
def _write_str(write, s, indent, hint=0):
if not s:
write('""')
return
m = _STR_RE.match(s)
if m.group(1):
write(s)
elif m.group(2):
_writeQuoted(write,s)
else:
ind = " "*indent
write("\n%s#"%ind)
for i in xrange(0, len(s), 30):
write(binascii.b2a_hex(s[i:i+30]))
if i+30 < len(s):
write("\n%s "%ind)
if hint:
write("#")
else:
write("#\n%s"%ind)
def _write_atom(write, s, indent):
if isinstance(s, types.StringType):
_write_str(write, s, indent,0)
return 1
try:
getHint = s.getHint
except AttributeError:
return 0
else:
write("[")
_write_str(write,getHint(),indent,1)
write("]")
_write_str(write,str(s),indent,0)
return 1
def write_pretty(write, rep, indent_step=1):
if _write_atom(write, rep, 0):
return
stack = []
idx = 0
indent = 0
write("(")
while 1:
while idx == len(rep):
write(")")
indent -= indent_step
try:
rep,idx = stack.pop()
except IndexError:
write("\n")
return
if _write_atom(write,rep[idx], indent):
idx += 1
if idx < len(rep):
write(" ")
continue
stack.append((rep,idx+1))
rep = rep[idx]
idx = 0
indent += indent_step
write("\n%s("%(" "*indent))
write_pretty(sys.stdout.write, [["abc","d","ef",["1"]]])