[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[minion-cvs] Make hash logs more robust with synchronous journals



Update of /home/minion/cvsroot/src/minion/lib/mixminion
In directory moria.seul.org:/tmp/cvs-serv2601/lib/mixminion

Modified Files:
	HashLog.py benchmark.py test.py 
Log Message:
Make hash logs more robust with synchronous journals

Index: HashLog.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/HashLog.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -d -r1.11 -r1.12
--- HashLog.py	10 Sep 2002 14:45:30 -0000	1.11
+++ HashLog.py	2 Dec 2002 20:18:44 -0000	1.12
@@ -8,13 +8,14 @@
 import os
 import anydbm, dumbdbm
 from mixminion.Common import MixFatalError, getLog, createPrivateDir
+from mixminion.Packet import DIGEST_LEN
 
 __all__ = [ 'HashLog' ]
 
 # FFFF Mechanism to force a different default db module.
 
-# FFFF Journaling for dbs that don't recover from catastrophic failure during
-# FFFF writes.
+# FFFF two-copy journaling to protect against catastrophic failure that
+# FFFF underlying DB code can't handle.
 
 class HashLog:
     """A HashLog is a file containing a list of message digests that we've
@@ -38,6 +39,16 @@
 
        The base HashLog implementation assumes an 8-bit-clean database that
        maps strings to strings."""
+    ##
+    # Internally, we also keep a flat 'journal' file to which we append
+    # values that we've seen but not yet written to the database.  This way
+    # we can survive crashes between 'logHash' and 'sync'.
+    #
+    # Fields:
+    #   log
+    #   journalFileName
+    #   journalFile
+    #   journal
     def __init__(self, filename, keyid):
         """Create a new HashLog to store data in 'filename' for the key
            'keyid'."""
@@ -52,9 +63,23 @@
         except KeyError:
             self.log["KEYID"] = keyid
 
+	self.journalFileName = filename+"_jrnl"
+	self.journal = {}
+	if os.path.exists(self.journalFileName):
+	    f = open(self.journalFileName, 'r')
+	    j = f.read()
+	    for i in xrange(0, len(j), DIGEST_LEN):
+		self.journal[j[i:i+DIGEST_LEN]] = 1
+	    f.close()
+
+	self.journalFile = os.open(self.journalFileName, 
+		    os.O_WRONLY|os.O_CREAT|os.O_APPEND|os.O_SYNC, 0700)
+
     def seenHash(self, hash):
         """Return true iff 'hash' has been logged before."""
         try:
+	    if self.journal.get(hash,0):
+		return 1
             _ = self.log[hash]
             return 1
         except KeyError:
@@ -62,15 +87,26 @@
 
     def logHash(self, hash):
         """Insert 'hash' into the database."""
-        self.log[hash] = "1"
+	assert len(hash) == DIGEST_LEN
+	self.journal[hash] = 1
+	#self.journalFile.write(hash)
+	os.write(self.journalFile, hash)
 
     def sync(self):
         """Flushes changes to this log to the filesystem."""
+	for hash in self.journal.keys():
+	    self.log[hash] = "1"
         if hasattr(self.log, "sync"):
             self.log.sync()
+	os.close(self.journalFile)
+	self.journalFile = os.open(self.journalFileName,
+		os.O_WRONLY|os.O_CREAT|os.O_TRUNC|os.O_SYNC, 0700)
+	self.journal = {}
 
     def close(self):
         """Closes this log."""
         self.sync()
         self.log.close()
+	os.close(self.journalFile)
+	
 

Index: benchmark.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/benchmark.py,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -d -r1.13 -r1.14
--- benchmark.py	2 Dec 2002 03:30:07 -0000	1.13
+++ benchmark.py	2 Dec 2002 20:18:44 -0000	1.14
@@ -288,6 +288,7 @@
 def _hashlogTiming(fname, load):
     from mixminion.HashLog import HashLog
 
+    # Try more realistic access patterns.
     prng = AESCounterPRNG("a"*16)
     
     print "Testing hash log (%s entries)"%load
@@ -299,6 +300,7 @@
     t = time()
     for n in xrange(len(hashes)):
         h.logHash(hashes[n])
+    h.sync()
     t = time()-t
     print "Add entry (up to %s entries)" %load, timestr(t/float(load))
 

Index: test.py
===================================================================
RCS file: /home/minion/cvsroot/src/minion/lib/mixminion/test.py,v
retrieving revision 1.40
retrieving revision 1.41
diff -u -d -r1.40 -r1.41
--- test.py	2 Dec 2002 10:13:49 -0000	1.40
+++ test.py	2 Dec 2002 20:18:44 -0000	1.41
@@ -805,47 +805,47 @@
         notseen("\000"*10)
         notseen("\000")
         notseen("\277"*10)
-        log("a")
+        log("a"*20)
         notseen("a*10")
         notseen("\000"*10)
         notseen("b")
-        seen("a")
+        seen("a"*20)
 
-        log("b")
-        seen("b")
-        seen("a")
+        log("b"*20)
+        seen("b"*20)
+        seen("a"*20)
 
-        log("\000")
-        seen("\000")
+        log("\000"*20)
+        seen("\000"*20)
         notseen("\000"*10)
 
-        log("\000"*10)
-        seen("\000"*10)
+        log("\000"*20)
+        seen("\000"*20)
 
         log("\277"*20)
         seen("\277"*20)
 
-        log("abcdef"*4)
-        seen("abcdef"*4)
+        log("abcde"*4)
+        seen("abcde"*4)
 
         h[0].close()
         h[0] = HashLog(fname, "Xyzzy")
-        seen("a")
-        seen("b")
+        seen("a"*20)
+        seen("b"*20)
         seen("\277"*20)
-        seen("abcdef"*4)
-        seen("\000")
-        seen("\000"*10)
+        seen("abcde"*4)
+        seen("\000"*20)
         notseen(" ")
         notseen("\000"*5)
+        notseen("\001"*20)
 
-        notseen("ddddd")
-        log("ddddd")
-        seen("ddddd")
+        notseen("ddddd"*4)
+        log("ddddd"*4)
+        seen("ddddd"*4)
 
         h[0].close()
         h[0] = HashLog(fname, "Xyzzy")
-        seen("ddddd")
+        seen("ddddd"*4)
 
         h[0].close()