[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[minion-cvs] Rewrite aes_ctr.c, since the openssl people believe som...



Update of /home/minion/cvsroot/src/minion/src
In directory moria.seul.org:/tmp/cvs-serv12972

Modified Files:
	_minionlib.h aes_ctr.c crypt.c 
Log Message:
Rewrite aes_ctr.c, since the openssl people believe some of our issues
are nonbugs.  Optimize it a bit while there.


Index: _minionlib.h
===================================================================
RCS file: /home/minion/cvsroot/src/minion/src/_minionlib.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- _minionlib.h	29 May 2002 17:46:24 -0000	1.2
+++ _minionlib.h	31 May 2002 12:39:18 -0000	1.3
@@ -5,11 +5,13 @@
 
 #include <Python.h>
 #include <openssl/aes.h>
+#include <openssl/opensslv.h>
+#if (OPENSSL_VERSION_NUMBER < 0x00907000L)
+#error "Mixminion requires OpenSSL 0.9.7 (which might not have been released yet, but you can get snapshots from openssl.org)."
+#endif
 
-#define AESCRYPT mix_AES_ctr128_encrypt
-void mix_AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
-			    const unsigned long length, const AES_KEY *key,
-			    unsigned char *counter, unsigned int *num);
+void mm_aes_counter128(const char *in, char *out, unsigned int len, 
+		       AES_KEY *key, unsigned long count);
 
 #define FUNC(fn) PyObject* fn(PyObject *self, PyObject *args, PyObject *kwdict)
 #define DOC(fn) extern const char fn##__doc__[]

Index: aes_ctr.c
===================================================================
RCS file: /home/minion/cvsroot/src/minion/src/aes_ctr.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- aes_ctr.c	29 May 2002 03:52:13 -0000	1.1
+++ aes_ctr.c	31 May 2002 12:39:18 -0000	1.2
@@ -1,126 +1,111 @@
-/* crypto/aes/aes_ctr.c -*- mode:C; c-file-style: "eay" -*- */
-/* ====================================================================
- * Copyright (c) 1998-2002 The OpenSSL Project.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- *    software must display the following acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For written permission, please contact
- *    openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- *    nor may "OpenSSL" appear in their names without prior written
- *    permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
+/* Copyright (c) 2002 Nick Mathewson.  See LICENSE for licensing information */
+/* $Id$ */
+
+/* This file reimplements counter mode.  The OpenSSL implementation is
+ * unsuitable because 
+ *          a) It wants to compute E(x << 64) for E(0),E(1),...
+ *          b) It can't begin in the middle of a stream.  (It can resume,
+ *             but that's not the same.)
+ *          c) It uses some awfully brute-forceish logic to increment
+ *             the counter.  Sure, that's not in the critical path,
+ *             but it still wrankles.
  *
+ * Disclosure: I have seen and played with the OpenSSL implementation for
+ *   a while before I decided to abandon it.
  */
 
-#include <_minionlib.h>
+#include <openssl/aes.h>
+#include <alloca.h>
+#include <string.h>
+#include <stdio.h>
 
-/* Modified code for aes_ctr.c from openssl.  The difference is: this
- * code actually works.
- * 
- * I've submitted a bug report to the openssl people. -- NM 
- */
-/* For some weird reason, openssl wants to count by 2**64, and wants
- * to increment funny.  I've given up an rewritten the incr function too.
- * I'm leaving the above copyright notice to be safe. -- NM
- */
+typedef unsigned int u32;
+typedef unsigned char u8;
 
-#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
-#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
+/* ======================================================================
+   Endianness is ugly. */
 
-typedef unsigned long u32;
-typedef unsigned char u8;
+#undef GET_U32
+#undef SET_U32
 
-/* increment counter (128-bit big-endian int) by 1 */
-static void 
-incr(unsigned char *counter) {
-        unsigned long c;
+#ifdef MM_B_ENDIAN
+#define GET_U32(ptr) (*(ptr))
+#define SET_U32(ptr,i) (*(ptr)) = i 
+#endif
 
-	c = GETU32(counter+12);
-	c++;
-	PUTU32(counter+12,c);
-	if (c) return;
+#if 0  /* On my Athlon, bswap_32 is actually slower.  Surprisingly,
+	 the code in glib/gtypes.h _is_ faster; but shaves only 1%
+         off encryption.  We seem to have reached the point of diminishing
+         returns here.*/
+#ifdef MM_L_ENDIAN
+#ifdef MM_HAVE_BYTESWAP_H
+#include <byteswap.h>
+#define GET_U32(ptr) bswap_32(*(u32*)(ptr))
+#define SET_U32(ptr,i) (*(u32*)(ptr)) = bswap_32(i)
+#endif
+#endif
+#endif 
 
-	c = GETU32(counter+8);
-	c++;
-	PUTU32(counter+8,c);
-	if (c) return;
-		
 
-	c = GETU32(counter+4);
-	c++;
-	PUTU32(counter+8,c);
-	if (c) return;
+#ifndef GET_U32 
+#define GET_U32_cp(ptr) (  (u32)ptr[0] ^         \
+                         (((u32)ptr[1]) << 8) ^  \
+                         (((u32)ptr[2]) << 16) ^ \
+                         (((u32)ptr[3]) << 24))
+#define SET_U32_cp(ptr, i) { ptr[0] = (i)     & 0xff; \
+                             ptr[1] = (i>>8)  & 0xff; \
+                             ptr[2] = (i>>16) & 0xff; \
+                             ptr[3] = (i>>24) & 0xff; } 
+#define GET_U32(ptr)   GET_U32_cp(((u8*)(ptr)))
+#define SET_U32(ptr,i) SET_U32_cp(((u8*)(ptr)), i)
+#endif
 
-	c = GETU32(counter+0);
-	c++;
-	PUTU32(counter+8,c);
-	if (c) return;
-}
 
-/* The input encrypted as though 128bit counter mode is being
- * used.  The extra state information to record how much of the
- * 128bit block we have used is contained in *num;
- */
-void 
-mix_AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
-		       const unsigned long length, const AES_KEY *key,
-		       unsigned char *counter, unsigned int *num) {
-        unsigned int n;
-        unsigned long l=length;
-        unsigned char tmp[AES_BLOCK_SIZE];
+static inline void
+mm_incr(u32 const* ctr32)
+{
+	u32 i;
 
-        assert(in && out && key && counter && num);
-        n = *num;
+	i = GET_U32(ctr32+3) + 1;
+	SET_U32(ctr32+3, i);
+	if (i) return;
+	
+	i = GET_U32(ctr32+2) + 1;
+	SET_U32(ctr32+2, i);
+	if (i) return;
+	
+	i = GET_U32(ctr32+1) + 1;
+	SET_U32(ctr32+1, i);
+	if (i) return;
+			
+	i = GET_U32(ctr32) + 1;
+	SET_U32(ctr32, i);
+}
 
-	AES_encrypt(counter,tmp,key);
+void
+mm_aes_counter128(const char *in, char *out, unsigned int len, AES_KEY *key,
+                  unsigned long count) 
+{
+	unsigned char counter[16];
+	unsigned char tmp[16];
+	/* making this a variable can hurt register pressure, and we'd
+	   really like the compiler to be able to inline mm_incr above. */
+	#define CTR32 ((u32*)counter)
 
-        while (l--) {
-                *(out++) = *(in++) ^ tmp[n];
-                n = (n+1) % AES_BLOCK_SIZE;
-		if (n == 0 && l) {
-                        incr(counter);
-                        AES_encrypt(counter, tmp, key);
-                }			
-        }
+	if (!len) return;
+	memset(counter, 0, 12);
+	SET_U32(CTR32+3, count >> 4);
+	count &= 0x0f;
 
-        *num=n;
+	while (1) {
+		AES_encrypt(counter, tmp, key);
+		do {
+			*(out++) = *(in++) ^ tmp[count];
+			if (--len == 0) return; 
+		} while (++count != 16);
+		mm_incr(CTR32);
+		count = 0;
+	}
 }
 
 /*
@@ -129,3 +114,4 @@
   c-basic-offset:8
   End:
 */
+

Index: crypt.c
===================================================================
RCS file: /home/minion/cvsroot/src/minion/src/crypt.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- crypt.c	29 May 2002 17:46:24 -0000	1.2
+++ crypt.c	31 May 2002 12:39:18 -0000	1.3
@@ -127,9 +127,9 @@
   "at idx.  If prng is nonzero, ignores string and just produces a stream of\n"
   "length prng.\n\n"
   "BUG: only the 32 least significant bits of idx are used.\n\n"
-  "Performance notes:  PRNG mode is much faster (33% @ 32K) than generating\n"
+  "Performance notes:  PRNG mode is much faster (36% @ 32K) than generating\n"
   "a string of NULs in Python and encrypting it.  Encryption, on the other\n"
-  "hand, is only slightly faster (11% @ 32K) than XORing the prng output\n"
+  "hand, is only slightly faster (15% @ 32K) than XORing the prng output\n"
   "with the plaintext.\n";
 
 PyObject*
@@ -139,10 +139,8 @@
 	unsigned char *input;
         int inputlen, prng=0;
 	long idx=0;
-	int shortidx;
-	AES_KEY *aes_key =NULL;
+	AES_KEY *aes_key = NULL;
 
-	unsigned char *counter;
 	PyObject *output;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwdict, 
@@ -155,18 +153,6 @@
 	if (idx < 0) idx = 0;
 	if (prng < 0) prng = 0;
 
-	shortidx = idx & 0x0f;
-	idx >>= 4;
-	counter = malloc(AES_BLOCK_SIZE);
-	if (!counter) { PyErr_NoMemory(); return NULL; }
-		
-	memset(counter, 0, AES_BLOCK_SIZE);
-	if (idx != 0) {
-		counter[15] =  idx        & 0xff;
-		counter[14] = (idx >> 8)  & 0xff;
-		counter[13] = (idx >> 16) & 0xff;
-		counter[12] = (idx >> 24) & 0xff;
-	}
 	if (prng) { 
 		inputlen = prng;
 		input = malloc(prng);
@@ -176,16 +162,12 @@
 	output = PyString_FromStringAndSize(NULL, inputlen);
 	if (!output) {
 		PyErr_NoMemory(); 
-		free(counter); 
 		if (prng) free(input);
 		return NULL;
 	}
 
-	AESCRYPT((const char*)input, PyString_AS_STRING(output),
-		 inputlen, aes_key,
-		 counter, &shortidx);
-
-	free(counter);
+	mm_aes_counter128(input, PyString_AS_STRING(output), inputlen,
+			  aes_key, idx); 
 
 	if (prng) free(input);
 	return output;
@@ -634,6 +616,11 @@
 				  &input,&inputlen,&param,&paramlen,&keylen))
 		return NULL;
 
+	/**
+	 * XXXX Why is this test (along with the input+1 below) necessary?
+	 * XXXX I'd be happier if I knew, and I'd take out the bit about
+	 * XXXX our assumptions being gravely mistaken. :)
+	 **/
 	if (inputlen == 0 || *input != '\000') {
 		PyErr_SetString(mm_SSLError,
 				"Bad padding, or our assumptions about "