[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[minion-cvs] Make hash logs more robust with synchronous journals
Update of /home/minion/cvsroot/src/minion/lib/mixminion
In directory moria.seul.org:/tmp/cvs-serv2601/lib/mixminion
Modified Files:
HashLog.py benchmark.py test.py
Log Message:
Make hash logs more robust with synchronous journals
Index: HashLog.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/HashLog.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -d -r1.11 -r1.12
--- HashLog.py 10 Sep 2002 14:45:30 -0000 1.11
+++ HashLog.py 2 Dec 2002 20:18:44 -0000 1.12
@@ -8,13 +8,14 @@
import os
import anydbm, dumbdbm
from mixminion.Common import MixFatalError, getLog, createPrivateDir
+from mixminion.Packet import DIGEST_LEN
__all__ = [ 'HashLog' ]
# FFFF Mechanism to force a different default db module.
-# FFFF Journaling for dbs that don't recover from catastrophic failure during
-# FFFF writes.
+# FFFF two-copy journaling to protect against catastrophic failure that
+# FFFF underlying DB code can't handle.
class HashLog:
"""A HashLog is a file containing a list of message digests that we've
@@ -38,6 +39,16 @@
The base HashLog implementation assumes an 8-bit-clean database that
maps strings to strings."""
+ ##
+ # Internally, we also keep a flat 'journal' file to which we append
+ # values that we've seen but not yet written to the database. This way
+ # we can survive crashes between 'logHash' and 'sync'.
+ #
+ # Fields:
+ # log
+ # journalFileName
+ # journalFile
+ # journal
def __init__(self, filename, keyid):
"""Create a new HashLog to store data in 'filename' for the key
'keyid'."""
@@ -52,9 +63,23 @@
except KeyError:
self.log["KEYID"] = keyid
+ self.journalFileName = filename+"_jrnl"
+ self.journal = {}
+ if os.path.exists(self.journalFileName):
+ f = open(self.journalFileName, 'r')
+ j = f.read()
+ for i in xrange(0, len(j), DIGEST_LEN):
+ self.journal[j[i:i+DIGEST_LEN]] = 1
+ f.close()
+
+ self.journalFile = os.open(self.journalFileName,
+ os.O_WRONLY|os.O_CREAT|os.O_APPEND|os.O_SYNC, 0700)
+
def seenHash(self, hash):
"""Return true iff 'hash' has been logged before."""
try:
+ if self.journal.get(hash,0):
+ return 1
_ = self.log[hash]
return 1
except KeyError:
@@ -62,15 +87,26 @@
def logHash(self, hash):
"""Insert 'hash' into the database."""
- self.log[hash] = "1"
+ assert len(hash) == DIGEST_LEN
+ self.journal[hash] = 1
+ #self.journalFile.write(hash)
+ os.write(self.journalFile, hash)
def sync(self):
"""Flushes changes to this log to the filesystem."""
+ for hash in self.journal.keys():
+ self.log[hash] = "1"
if hasattr(self.log, "sync"):
self.log.sync()
+ os.close(self.journalFile)
+ self.journalFile = os.open(self.journalFileName,
+ os.O_WRONLY|os.O_CREAT|os.O_TRUNC|os.O_SYNC, 0700)
+ self.journal = {}
def close(self):
"""Closes this log."""
self.sync()
self.log.close()
+ os.close(self.journalFile)
+
Index: benchmark.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/benchmark.py,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- benchmark.py 2 Dec 2002 03:30:07 -0000 1.13
+++ benchmark.py 2 Dec 2002 20:18:44 -0000 1.14
@@ -288,6 +288,7 @@
def _hashlogTiming(fname, load):
from mixminion.HashLog import HashLog
+ # Try more realistic access patterns.
prng = AESCounterPRNG("a"*16)
print "Testing hash log (%s entries)"%load
@@ -299,6 +300,7 @@
t = time()
for n in xrange(len(hashes)):
h.logHash(hashes[n])
+ h.sync()
t = time()-t
print "Add entry (up to %s entries)" %load, timestr(t/float(load))
Index: test.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/test.py,v
retrieving revision 1.40
retrieving revision 1.41
diff -u -d -r1.40 -r1.41
--- test.py 2 Dec 2002 10:13:49 -0000 1.40
+++ test.py 2 Dec 2002 20:18:44 -0000 1.41
@@ -805,47 +805,47 @@
notseen("\000"*10)
notseen("\000")
notseen("\277"*10)
- log("a")
+ log("a"*20)
notseen("a*10")
notseen("\000"*10)
notseen("b")
- seen("a")
+ seen("a"*20)
- log("b")
- seen("b")
- seen("a")
+ log("b"*20)
+ seen("b"*20)
+ seen("a"*20)
- log("\000")
- seen("\000")
+ log("\000"*20)
+ seen("\000"*20)
notseen("\000"*10)
- log("\000"*10)
- seen("\000"*10)
+ log("\000"*20)
+ seen("\000"*20)
log("\277"*20)
seen("\277"*20)
- log("abcdef"*4)
- seen("abcdef"*4)
+ log("abcde"*4)
+ seen("abcde"*4)
h[0].close()
h[0] = HashLog(fname, "Xyzzy")
- seen("a")
- seen("b")
+ seen("a"*20)
+ seen("b"*20)
seen("\277"*20)
- seen("abcdef"*4)
- seen("\000")
- seen("\000"*10)
+ seen("abcde"*4)
+ seen("\000"*20)
notseen(" ")
notseen("\000"*5)
+ notseen("\001"*20)
- notseen("ddddd")
- log("ddddd")
- seen("ddddd")
+ notseen("ddddd"*4)
+ log("ddddd"*4)
+ seen("ddddd"*4)
h[0].close()
h[0] = HashLog(fname, "Xyzzy")
- seen("ddddd")
+ seen("ddddd"*4)
h[0].close()