[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[or-cvs] r12983: Here, have some terribly clever new buffer code. It uses a m (in tor/trunk: . src/or)



Author: nickm
Date: 2007-12-25 19:12:08 -0500 (Tue, 25 Dec 2007)
New Revision: 12983

Modified:
   tor/trunk/
   tor/trunk/ChangeLog
   tor/trunk/src/or/buffers.c
   tor/trunk/src/or/connection.c
   tor/trunk/src/or/main.c
   tor/trunk/src/or/or.h
   tor/trunk/src/or/test.c
Log:
 r15693@tombo:  nickm | 2007-12-25 19:11:29 -0500
 Here, have some terribly clever new buffer code.  It uses a mbuf-like strategy rather than a ring buffer strategy, so it should require far far less extra memory to hold any given amount of data.  Also, it avoids access patterns like x=malloc(1024);x=realloc(x,1048576);x=realloc(x,1024);append_to_freelist(x) that might have been contributing to memory fragmentation.  I've  tested it out a little on peacetime, and it seems to work so far.  If you want to benchmark it for speed, make sure to remove the #define PARANOIA; #define NOINLINE macros at the head of the module.



Property changes on: tor/trunk
___________________________________________________________________
 svk:merge ticket from /tor/trunk [r15693] on d9e39d38-0f13-419c-a857-e10a0ce2aa0c

Modified: tor/trunk/ChangeLog
===================================================================
--- tor/trunk/ChangeLog	2007-12-26 00:12:05 UTC (rev 12982)
+++ tor/trunk/ChangeLog	2007-12-26 00:12:08 UTC (rev 12983)
@@ -1,3 +1,11 @@
+Changes in version 0.2.0.16-alpha - 2008-01-??
+  o Major performance improvements:
+    - Switch our old ring buffer implementation for one more like that
+      used by free Unix kernels.  The wasted space in a buffer with
+      1mb of data will now be more like 8k than 1mb.  The new
+      implementation also avoids realloc();realloc(); patterns that
+      can contribute to memory fragmentation.
+
 Changes in version 0.2.0.15-alpha - 2007-12-25
   o Major bugfixes:
     - Fix several remotely triggerable asserts based on DirPort requests

Modified: tor/trunk/src/or/buffers.c
===================================================================
--- tor/trunk/src/or/buffers.c	2007-12-26 00:12:05 UTC (rev 12982)
+++ tor/trunk/src/or/buffers.c	2007-12-26 00:12:08 UTC (rev 12983)
@@ -16,42 +16,9 @@
 
 #include "or.h"
 
-#undef SENTINELS
-#undef CHECK_AFTER_RESIZE
-#undef PARANOIA
-#undef NOINLINE
+#define PARANOIA
+#define NOINLINE
 
-/* If SENTINELS is defined, check for attempts to write beyond the
- * end/before the start of the buffer.
- */
-#ifdef SENTINELS
-/** 4-byte value to write at the start of each buffer memory region. */
-#define START_MAGIC 0x70370370u
-/** 4-byte value to write at the end of each buffer memory region. */
-#define END_MAGIC 0xA0B0C0D0u
-/** Given buf->mem, yield a pointer to the raw memory region (for free(),
- * realloc(), and so on). */
-#define RAW_MEM(m) ((void*)(((char*)m)-4))
-/** Given a pointer to the raw memory region (from malloc() or realloc()),
- * yield the correct value for buf->mem (just past the first sentinel). */
-#define GUARDED_MEM(m) ((void*)(((char*)m)+4))
-/** How much memory do we need to allocate for a buffer to hold <b>ln</b> bytes
- * of data? */
-#define ALLOC_LEN(ln) ((ln)+8)
-/** Initialize the sentinel values on <b>m</b> (a value of buf-&gt;mem), which
- * has <b>ln</b> useful bytes. */
-#define SET_GUARDS(m, ln) \
-  STMT_BEGIN                         \
-    set_uint32((m)-4,START_MAGIC);   \
-    set_uint32((m)+ln,END_MAGIC);    \
-  STMT_END
-#else
-#define RAW_MEM(m) (m)
-#define GUARDED_MEM(m) (m)
-#define ALLOC_LEN(ln) (ln)
-#define SET_GUARDS(m,ln) STMT_NIL
-#endif
-
 #ifdef PARANOIA
 #define check() STMT_BEGIN assert_buf_ok(buf); STMT_END
 #else
@@ -63,383 +30,331 @@
 #define INLINE
 #endif
 
-/** Magic value for buf_t.magic, to catch pointer errors. */
-#define BUFFER_MAGIC 0xB0FFF312u
-/** A resizeable buffer, optimized for reading and writing. */
-struct buf_t {
-  uint32_t magic; /**< Magic cookie for debugging: Must be set to
-                   *   BUFFER_MAGIC. */
-  char *mem;      /**< Storage for data in the buffer. */
-  char *cur;      /**< The first byte used for storing data in the buffer. */
-  size_t highwater; /**< Largest observed datalen since last buf_shrink. */
-  size_t len;     /**< Maximum amount of data that <b>mem</b> can hold. */
-  size_t memsize; /**< How many bytes did we actually allocate? Can be less
-                   * than 'len' if we shortened 'len' by a few bytes to make
-                   * zlib wrap around more easily. */
-  size_t datalen; /**< Number of bytes currently in <b>mem</b>. */
-};
+/* Implementation notes:
+ *
+ * After flirting with memmove, and dallying with ring-buffers, we're finally
+ * getting up to speed with the 1970s and implementing buffers as a linked
+ * list of small chunks.  Each buffer has such a list; data is removed from
+ * the head of the list, and added at the tail.  The list is singly linked,
+ * and the buffer keeps a pointer to the head and the tail.
+ *
+ * Every chunk, except the tail, contains at least one byte of data.  Data in
+ * each chunk is contiguous.
+ *
+ * When you need to treat the first N characters on a buffer as a contiguous
+ * string, use the buf_pullup function to make them so.  Don't do this more
+ * than necessary.
+ *
+ * The major free Unix kernels have handled buffers like this since, like,
+ * forever.
+ */
 
-/** Size, in bytes, for newly allocated buffers.  Should be a power of 2. */
-#define INITIAL_BUF_SIZE (4*1024)
-/** Size, in bytes, for minimum 'shrink' size for buffers.  Buffers may start
- * out smaller than this, but they will never autoshrink to less
- * than this size. */
-#define MIN_LAZY_SHRINK_SIZE (4*1024)
+/* Chunk manipulation functions */
 
-static INLINE void peek_from_buf(char *string, size_t string_len, buf_t *buf);
+/** A single chunk on a buffer or in a freelist. */
+typedef struct chunk_t {
+  struct chunk_t *next; /**< The next chunk on the buffer or freelist. */
+  size_t datalen; /**< The number of bytes stored in this chunk */
+  size_t memlen; /**< The number of usable bytes of storage in <b>mem</b>. */
+  char *data; /**< A pointer to the first byte of data stored in <b>mem</b>. */
+  char mem[1]; /**< The actual memory used for storage in this chunk. May be
+                * more than one byte long. */
+} chunk_t;
 
-/** If the contents of buf wrap around the end of the allocated space,
- * malloc a new buf and copy the contents in starting at the
- * beginning. This operation is relatively expensive, so it shouldn't
- * be used e.g. for every single read or write.
- */
-static void
-buf_normalize(buf_t *buf)
-{
-  check();
-  if (buf->cur + buf->datalen <= buf->mem+buf->len) {
-    return;
-  } else {
-    char *newmem, *oldmem;
-    size_t sz = (buf->mem+buf->len)-buf->cur;
-    log_warn(LD_BUG, "Unexpected non-normalized buffer.");
-    newmem = GUARDED_MEM(tor_malloc(ALLOC_LEN(buf->memsize)));
-    SET_GUARDS(newmem, buf->memsize);
-    memcpy(newmem, buf->cur, sz);
-    memcpy(newmem+sz, buf->mem, buf->datalen-sz);
-    oldmem = RAW_MEM(buf->mem);
-    tor_free(oldmem); /* Can't use tor_free directly. */
-    buf->mem = buf->cur = newmem;
-    buf->len = buf->memsize;
-    check();
-  }
-}
+/** Return the number of bytes needed to allocate a chunk to hold
+ * <b>memlen</b> bytes. */
+#define CHUNK_ALLOC_SIZE(memlen) (sizeof(chunk_t) + (memlen) - 1)
+/** Return the number of usable bytes in a chunk allocated with
+ * malloc(<b>memlen</b>). */
+#define CHUNK_SIZE_WITH_ALLOC(memlen) ((memlen) - sizeof(chunk_t) + 1)
 
-/** Return the point in the buffer where the next byte will get stored. */
+/** Return the next character in <b>chunk</b> onto which data can be appended.
+ * If the chunk is full, this might be off the end of chunk->mem. */
 static INLINE char *
-_buf_end(buf_t *buf)
+CHUNK_WRITE_PTR(chunk_t *chunk)
 {
-  char *next = buf->cur + buf->datalen;
-  char *end = buf->mem + buf->len;
-  return (next < end) ? next : (next - buf->len);
+  return chunk->data + chunk->datalen;
 }
 
-/** If the pointer <b>cp</b> has passed beyond the end of the buffer, wrap it
- * around. */
-static INLINE char *
-_wrap_ptr(buf_t *buf, char *cp)
+/** Return the number of bytes that can be written onto <b>chunk</b> without
+ * running out of space. */
+static INLINE size_t
+CHUNK_REMAINING_CAPACITY(const chunk_t *chunk)
 {
-  return (cp >= buf->mem + buf->len) ? (cp - buf->len) : cp;
+  return (chunk->mem + chunk->memlen) - (chunk->data + chunk->datalen);
 }
 
-/** Return the offset of <b>cp</b> within the buffer. */
-static INLINE int
-_buf_offset(buf_t *buf, char *cp)
-{
-  if (cp >= buf->cur)
-    return cp - buf->cur;
-  else
-    /* return (cp - buf->mem) + buf->mem+buf->len - buf->cur */
-    return cp + buf->len - buf->cur;
-}
-
-/** If the range of *<b>len</b> bytes starting at <b>at</b> wraps around the
- * end of the buffer, then set *<b>len</b> to the number of bytes starting
- * at <b>at</b>, and set *<b>more_len</b> to the number of bytes starting
- * at <b>buf-&gt;mem</b>.  Otherwise, set *<b>more_len</b> to 0.
- */
+/** Move all bytes stored in <b>chunk</b> to the front of <b>chunk</b>->mem,
+ * to free up space at the end. */
 static INLINE void
-_split_range(buf_t *buf, char *at, size_t *len,
-                                size_t *more_len)
+chunk_repack(chunk_t *chunk)
 {
-  char *eos = at + *len;
-  check();
-  if (eos >= (buf->mem + buf->len)) {
-    *more_len = eos - (buf->mem + buf->len);
-    *len -= *more_len;
-  } else {
-    *more_len = 0;
+  if (chunk->datalen && chunk->data != &chunk->mem[0]) {
+    memmove(chunk->mem, chunk->data, chunk->datalen);
   }
+  chunk->data = &chunk->mem[0];
 }
 
-/** A freelist of buffer RAM chunks. */
-typedef struct free_mem_list_t {
-  char *list; /**< The first item on the list; begins with pointer to the
-               * next item. */
-  int len; /**< How many entries in <b>list</b>. */
-  int lowwater; /**< The smallest that list has gotten since the last call to
-                 * buf_shrink_freelists(). */
-  const size_t chunksize; /**< How big are the items on the list? */
-  const int slack; /**< We always keep at least this many items on the list
-                    * when shrinking it. */
-  const int max; /**< How many elements are we willing to throw onto the list?
-                  */
-} free_mem_list_t;
+/** A freelist of chunks. */
+typedef struct chunk_freelist_t {
+  size_t alloc_size; /**< What size chunks does this freelist hold? */
+  int max_length; /**< Never allow more than this number of chunks in the
+                   * freelist. */
+  int slack; /**< When trimming the freelist, leave this number of extra
+              * chunks beyond lowest_length.*/
+  int cur_length; /**< How many chunks on the freelist now? */
+  int lowest_length; /**< What's the smallest value of cur_length since the
+                      * last time we cleaned this freelist? */
+  chunk_t *head; /**< First chunk on the freelist. */
+} chunk_freelist_t;
 
-/** Freelists to hold 4k and 16k memory chunks.  This seems to be what
- * we use most. */
-static free_mem_list_t free_mem_list_4k = { NULL, 0, 0, 4096, 16, INT_MAX };
-static free_mem_list_t free_mem_list_8k = { NULL, 0, 0, 8192 , 8, 128 };
-static free_mem_list_t free_mem_list_16k = { NULL, 0, 0, 16384, 4, 64 };
+/** Macro to help define freelists. */
+#define FL(a,m,s) { a, m, s, 0, 0, NULL }
 
-/** Macro: True iff the size is one for which we keep a freelist. */
-#define IS_FREELIST_SIZE(sz) ((sz) == 4096 || (sz) == 8192 || (sz) == 16384)
+/** Static array of freelists, sorted by alloc_len, terminated by an entry
+ * with alloc_size of 0. */
+static chunk_freelist_t freelists[] = {
+  FL(256, 1024, 16), FL(512, 1024, 16), FL(1024, 512, 8), FL(4096, 256, 8),
+  FL(8192, 128, 4), FL(16384, 64, 4), FL(0, 0, 0)
+};
+#undef FL
 
-/** Return the proper freelist for chunks of size <b>sz</b>, or fail
- * with an assertion. */
-static INLINE free_mem_list_t *
-get_free_mem_list(size_t sz)
+static void assert_freelist_ok(chunk_freelist_t *fl);
+
+/** Return the freelist to hold chunks of size <b>alloc</b>, or NULL if
+ * no freelist exists for that size. */
+static INLINE chunk_freelist_t *
+get_freelist(size_t alloc)
 {
-  if (sz == 4096) {
-    return &free_mem_list_4k;
-  } else if (sz == 8192) {
-    return &free_mem_list_8k;
-  } else {
-    tor_assert(sz == 16384);
-    return &free_mem_list_16k;
+  int i;
+  for (i=0; freelists[i].alloc_size <= alloc; ++i) {
+    if (freelists[i].alloc_size == alloc) {
+      return &freelists[i];
+    }
   }
+  return NULL;
 }
 
-/** Write the sizes of the buffer freelists at log level <b>severity</b> */
-void
-buf_dump_freelist_sizes(int severity)
+/** Deallocate a chunk or put it on a freelist */
+static void
+chunk_free(chunk_t *chunk)
 {
-  size_t sz;
-  log(severity, LD_MM, "======= Buffer freelists.");
-  for (sz = 4096; sz <= 16384; sz *= 2) {
-    uint64_t total_size;
-    free_mem_list_t *lst;
-    if (!IS_FREELIST_SIZE(sz))
-      continue;
-    lst = get_free_mem_list(sz);
-    total_size = ((uint64_t)sz)*lst->len;
-    log(severity, LD_MM,
-        U64_FORMAT" bytes in %d %d-byte buffers. (low-water: %d)",
-        U64_PRINTF_ARG(total_size), lst->len, (int)sz, lst->lowwater);
+  size_t alloc = CHUNK_ALLOC_SIZE(chunk->memlen);
+  chunk_freelist_t *freelist = get_freelist(alloc);
+  if (freelist && freelist->cur_length < freelist->max_length) {
+    chunk->next = freelist->head;
+    freelist->head = chunk;
+    ++freelist->cur_length;
+  } else {
+    tor_free(chunk);
   }
 }
 
-/** Throw the memory from <b>buf</b> onto the appropriate freelist.
- * Return true if we added the memory, 0 if the freelist was full. */
-static int
-add_buf_mem_to_freelist(buf_t *buf)
+/** Allocate a new chunk with a given allocation size, or get one from the
+ * freelist.  Note that a chunk with allocation size A can actualy hold only
+ * CHUNK_SIZE_WITH_ALLOC(A) bytes in its mem field. */
+static INLINE chunk_t *
+chunk_new_with_alloc_size(size_t alloc)
 {
-  char *mem;
-  free_mem_list_t *list;
+  chunk_t *ch;
+  chunk_freelist_t *freelist;
+  tor_assert(alloc >= sizeof(chunk_t));
+  freelist = get_freelist(alloc);
+  if (freelist && freelist->head) {
+    ch = freelist->head;
+    freelist->head = ch->next;
+    if (--freelist->cur_length < freelist->lowest_length)
+      freelist->lowest_length = freelist->cur_length;
+  } else {
+    /* XXXX020 take advantage of tor_malloc_roundup. */
+    ch = tor_malloc(alloc);
+  }
+  ch->next = NULL;
+  ch->datalen = 0;
+  ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc);
+  ch->data = &ch->mem[0];
+  return ch;
+}
 
-  tor_assert(buf->datalen == 0);
-  tor_assert(buf->mem);
-  list = get_free_mem_list(buf->len);
+/** Allocate a new chunk with memory size of <b>sz</b>. */
+#define chunk_new_with_capacity(sz) \
+  (chunk_new_with_alloc_size(CHUNK_ALLOC_SIZE(sz)))
 
-  if (list->len >= list->max)
-    return 0;
-
-  mem = RAW_MEM(buf->mem);
-  buf->len = buf->memsize = 0;
-  buf->mem = buf->cur = NULL;
-
-  *(char**)mem = list->list;
-  list->list = mem;
-  ++list->len;
-  log_debug(LD_GENERAL, "Add buf mem to %d-byte freelist.  Freelist has "
-            "%d entries.", (int)list->chunksize, list->len);
-
-  return 1;
+/** Expand <b>chunk</b> until it can hold <b>sz</b> bytes, and return a
+ * new pointer to <b>chunk</b>.  Old pointers are no longer valid. */
+static INLINE chunk_t *
+chunk_grow(chunk_t *chunk, size_t sz)
+{
+  off_t offset;
+  tor_assert(sz > chunk->memlen);
+  offset = chunk->data - chunk->mem;
+  chunk = tor_realloc(chunk, CHUNK_ALLOC_SIZE(sz));
+  chunk->memlen = sz;
+  chunk->data = chunk->mem + offset;
+  return chunk;
 }
 
-/** Pull memory of size <b>sz</b> from the appropriate freelist for use by
- * <b>buf</b>, or allocate it as needed. */
-static void
-buf_get_initial_mem(buf_t *buf, size_t sz)
-{
-  char *mem;
-  free_mem_list_t *list = get_free_mem_list(sz);
-  tor_assert(!buf->mem);
+/** If a read onto the end of a chunk would be smaller than this number, then
+ * just start a new chunk. */
+#define MIN_READ_LEN 8
+/** Every chunk should take up at least this many bytes. */
+#define MIN_CHUNK_ALLOC 256
+/*XXXX020 enforce this maximum. */
+#define MAX_CHUNK_ALLOC 65536
 
-  if (list->list) {
-    mem = list->list;
-    list->list = *(char**)mem;
-    if (--list->len < list->lowwater)
-      list->lowwater = list->len;
-    log_debug(LD_GENERAL, "Got buf mem from %d-byte freelist. Freelist has "
-             "%d entries.", (int)list->chunksize, list->len);
-  } else {
-    log_debug(LD_GENERAL, "%d-byte freelist empty; allocating another chunk.",
-             (int)list->chunksize);
-    tor_assert(list->len == 0);
-    mem = tor_malloc(ALLOC_LEN(sz));
+/** Return the allocation size we'd like to use to hold <b>target</b>
+ * bytes. */
+static INLINE size_t
+preferred_chunk_size(size_t target)
+{
+  /* XXXX020 use log2 code, maybe. */
+  size_t sz = MIN_CHUNK_ALLOC;
+  while (CHUNK_SIZE_WITH_ALLOC(sz) < target) {
+    sz <<= 1;
   }
-  buf->mem = GUARDED_MEM(mem);
-  SET_GUARDS(buf->mem, sz);
-  buf->len = sz;
-  buf->memsize = ALLOC_LEN(sz);
-  buf->cur = buf->mem;
+  return sz;
 }
 
-/** Remove elements from the freelists that haven't been needed since the
- * last call to this function. If <b>free_all</b>, we're exiting and we
- * should clear the whole lists. */
+/** Remove from the freelists most chunks that have not been used since the
+ * last call to buf_shrink_freelists(). */
 void
 buf_shrink_freelists(int free_all)
 {
-  int list_elt_size;
-  for (list_elt_size = 4096; list_elt_size <= 16384; list_elt_size *= 2) {
-    free_mem_list_t *list = get_free_mem_list(list_elt_size);
-    if (list->lowwater > list->slack || free_all) {
-      int i, n_to_skip, n_to_free;
-      char **ptr;
-      if (free_all) { /* Free every one of them */
-        log_debug(LD_GENERAL, "Freeing all %d elements from %d-byte freelist.",
-                  list->len, (int)list->chunksize);
-        n_to_free = list->len;
-      } else { /* Skip over the slack and non-lowwater entries */
-        log_debug(LD_GENERAL, "We haven't used %d/%d allocated %d-byte buffer "
-                  "memory chunks since the last call; freeing all but %d of "
-                  "them",
-               list->lowwater, list->len, (int)list->chunksize, list->slack);
-        n_to_free = list->lowwater - list->slack;
+  int i;
+  for (i = 0; freelists[i].alloc_size; ++i) {
+    int slack = freelists[i].slack;
+    assert_freelist_ok(&freelists[i]);
+    if (free_all || freelists[i].lowest_length > slack) {
+      int n_to_free = free_all ? freelists[i].cur_length :
+        (freelists[i].lowest_length - slack);
+      int n_to_skip = freelists[i].cur_length - n_to_free;
+      chunk_t **chp = &freelists[i].head;
+      chunk_t *chunk;
+      while (n_to_skip) {
+        tor_assert((*chp)->next);
+        chp = &(*chp)->next;
       }
-      n_to_skip = list->len - n_to_free;
-      for (ptr = &list->list, i = 0; i < n_to_skip; ++i) {
-        char *mem = *ptr;
-        tor_assert(mem);
-        ptr = (char**)mem;
+      chunk = *chp;
+      *chp = NULL;
+      while (chunk) {
+        chunk_t *next = chunk->next;
+        tor_free(chunk);
+        chunk = next;
+        --n_to_free;
       }
-      /* And free the remaining entries. */
-      for (i = 0; i < n_to_free; ++i) {
-        char *mem = *ptr;
-        tor_assert(mem);
-        *ptr = *(char**)mem;
-        tor_free(mem);
-        --list->len;
-      }
+      tor_assert(!n_to_free);
+      freelists[i].lowest_length = freelists[i].cur_length = n_to_skip;
     }
-    list->lowwater = list->len;
+    assert_freelist_ok(&freelists[i]);
   }
 }
 
-/** Change a buffer's capacity. <b>new_capacity</b> must be \>=
- * buf->datalen. */
-static void
-buf_resize(buf_t *buf, size_t new_capacity)
+/** Describe the current status of the freelists at log level <b>severity</b>.
+ */
+void
+buf_dump_freelist_sizes(int severity)
 {
-  off_t offset;
-#ifdef CHECK_AFTER_RESIZE
-  char *tmp, *tmp2;
-#endif
-  tor_assert(buf->datalen <= new_capacity);
-  tor_assert(new_capacity);
+  int i;
+  log(severity, LD_MM, "====== Buffer freelists:");
+  for (i = 0; freelists[i].alloc_size; ++i) {
+    uint64_t total = ((uint64_t)freelists[i].cur_length) *
+      freelists[i].alloc_size;
+    log(severity, LD_MM,
+        U64_FORMAT" bytes in %d %d-byte chunks", U64_PRINTF_ARG(total),
+        freelists[i].cur_length, (int)freelists[i].alloc_size);
+  }
+}
 
-#ifdef CHECK_AFTER_RESIZE
-  assert_buf_ok(buf);
-  tmp = tor_malloc(buf->datalen);
-  tmp2 = tor_malloc(buf->datalen);
-  peek_from_buf(tmp, buf->datalen, buf);
-#endif
+/** Magic value for buf_t.magic, to catch pointer errors. */
+#define BUFFER_MAGIC 0xB0FFF312u
+/** A resizeable buffer, optimized for reading and writing. */
+struct buf_t {
+  uint32_t magic; /**< Magic cookie for debugging: Must be set to
+                   *   BUFFER_MAGIC. */
+  size_t datalen; /**< How many bytes is this buffer holding right now? */
+  size_t default_chunk_size; /**< Don't allocate any chunks smaller than
+                              * this for this buffer. */
+  chunk_t *head; /**< First chunk in the list, or NULL for none. */
+  chunk_t *tail; /**< Last chunk in the list, or NULL for none. */
+};
 
-  if (buf->len == new_capacity)
+/** Collapse data from the first N chunks from <b>buf</b> into buf->head,
+ * growing it as necessary, until buf->head has the first <b>bytes</b> bytes
+ * of data from the buffer, or until buf->head has all the data in <b>buf</b>.
+ *
+ * If <b>nulterminate</b> is true, ensure that there is a 0 byte in
+ * buf->head->mem right after all the data. */
+static void
+buf_pullup(buf_t *buf, size_t bytes, int nulterminate)
+{
+  chunk_t *dest, *src;
+  size_t capacity;
+  if (!buf->head)
     return;
 
-  offset = buf->cur - buf->mem;
-  if (offset + buf->datalen > new_capacity) {
-    /* We need to move stuff before we shrink. */
-    if (offset + buf->datalen > buf->len) {
-      /* We have:
-       *
-       * mem[0] ... mem[datalen-(len-offset)] (end of data)
-       * mem[offset] ... mem[len-1]           (the start of the data)
-       *
-       * We're shrinking the buffer by (len-new_capacity) bytes, so we need
-       * to move the start portion back by that many bytes.
-       */
-      memmove(buf->cur-(buf->len-new_capacity), buf->cur,
-              (size_t)(buf->len-offset));
-      offset -= (buf->len-new_capacity);
-    } else {
-      /* The data doesn't wrap around, but it does extend beyond the new
-       * buffer length:
-       *   mem[offset] ... mem[offset+datalen-1] (the data)
-       */
-      memmove(buf->mem, buf->cur, buf->datalen);
-      offset = 0;
+  check();
+  if (buf->datalen < bytes)
+    bytes = buf->datalen;
+
+  if (nulterminate) {
+    capacity = bytes + 1;
+    if (buf->head->datalen >= bytes && CHUNK_REMAINING_CAPACITY(buf->head)) {
+      *CHUNK_WRITE_PTR(buf->head) = '\0';
+      return;
     }
+  } else {
+    capacity = bytes;
+    if (buf->head->datalen >= bytes)
+      return;
   }
 
-  if (buf->len == 0 && new_capacity < MIN_LAZY_SHRINK_SIZE)
-    new_capacity = MIN_LAZY_SHRINK_SIZE;
-
-  if (buf->len == 0 && IS_FREELIST_SIZE(new_capacity)) {
-    tor_assert(!buf->mem);
-    buf_get_initial_mem(buf, new_capacity);
+  if (buf->head->memlen >= capacity) {
+    /* We don't need to grow the first chunk, but we might need to repack it.*/
+    if (CHUNK_REMAINING_CAPACITY(buf->head) < capacity-buf->datalen)
+      chunk_repack(buf->head);
+    tor_assert(CHUNK_REMAINING_CAPACITY(buf->head) >= capacity-buf->datalen);
   } else {
-    char *raw;
-    if (buf->mem)
-      raw = tor_realloc(RAW_MEM(buf->mem), ALLOC_LEN(new_capacity));
-    else {
-      log_debug(LD_GENERAL, "Jumping straight from 0 bytes to %d",
-               (int)new_capacity);
-      raw = tor_malloc(ALLOC_LEN(new_capacity));
+    chunk_t *newhead;
+    size_t newsize;
+    /* We need to grow the chunk. */
+    chunk_repack(buf->head);
+    newsize = CHUNK_SIZE_WITH_ALLOC(preferred_chunk_size(capacity));
+    newhead = chunk_grow(buf->head, newsize);
+    tor_assert(newhead->memlen >= capacity);
+    if (newhead != buf->head) {
+      if (buf->tail == buf->head)
+        buf->tail = newhead;
+      buf->head = newhead;
     }
-    buf->mem = GUARDED_MEM(raw);
-    SET_GUARDS(buf->mem, new_capacity);
-    buf->cur = buf->mem+offset;
   }
 
-  if (offset + buf->datalen > buf->len) {
-    /* We need to move data now that we are done growing.  The buffer
-     * now contains:
-     *
-     * mem[0] ... mem[datalen-(len-offset)] (end of data)
-     * mem[offset] ... mem[len-1]           (the start of the data)
-     * mem[len]...mem[new_capacity]         (empty space)
-     *
-     * We're growing by (new_capacity-len) bytes, so we need to move the
-     * end portion forward by that many bytes.
-     */
-    memmove(buf->cur+(new_capacity-buf->len), buf->cur,
-            (size_t)(buf->len-offset));
-    buf->cur += new_capacity-buf->len;
+  dest = buf->head;
+  while (dest->datalen < bytes) {
+    size_t n = bytes - dest->datalen;
+    src = dest->next;
+    tor_assert(src);
+    if (n > src->datalen) {
+      memcpy(CHUNK_WRITE_PTR(dest), src->data, src->datalen);
+      dest->datalen += src->datalen;
+      dest->next = src->next;
+      if (buf->tail == src)
+        buf->tail = dest;
+      chunk_free(src);
+    } else {
+      memcpy(CHUNK_WRITE_PTR(dest), src->data, n);
+      dest->datalen += n;
+      src->data += n;
+      src->datalen -= n;
+      tor_assert(dest->datalen == bytes);
+    }
   }
-  buf->len = new_capacity;
-  buf->memsize = ALLOC_LEN(buf->len);
 
-#ifdef CHECK_AFTER_RESIZE
-  assert_buf_ok(buf);
-  peek_from_buf(tmp2, buf->datalen, buf);
-  if (memcmp(tmp, tmp2, buf->datalen)) {
-    tor_assert(0);
+  if (nulterminate) {
+    tor_assert(CHUNK_REMAINING_CAPACITY(buf->head));
+    *CHUNK_WRITE_PTR(buf->head) = '\0';
   }
-  tor_free(tmp);
-  tor_free(tmp2);
-#endif
-}
 
-/** If the buffer is not large enough to hold <b>capacity</b> bytes, resize
- * it so that it can.  (The new size will be a power of 2 times the old
- * size.)
- */
-static INLINE int
-buf_ensure_capacity(buf_t *buf, size_t capacity)
-{
-  size_t new_len, min_len;
-  if (buf->len >= capacity)  /* Don't grow if we're already big enough. */
-    return 0;
-  if (capacity > MAX_BUF_SIZE) /* Don't grow past the maximum. */
-    return -1;
-  /* Find the smallest new_len equal to (2**X) for some X; such that
-   * new_len is at least capacity, and at least 2*buf->len.
-   */
-  min_len = buf->len*2;
-  new_len = 16;
-  while (new_len < min_len)
-    new_len *= 2;
-  while (new_len < capacity)
-    new_len *= 2;
-  /* Resize the buffer. */
-  log_debug(LD_MM,"Growing buffer from %d to %d bytes.",
-            (int)buf->len, (int)new_len);
-  buf_resize(buf,new_len);
-  return 0;
+  check();
 }
 
 /** Resize buf so it won't hold extra memory that we haven't been
@@ -451,32 +366,7 @@
 void
 buf_shrink(buf_t *buf)
 {
-  size_t new_len;
-
-  new_len = buf->len;
-  /* Actually, we ignore highwater here if we're going to throw it on the
-   * freelist, since it's way cheaper to use the freelist than to use (some)
-   * platform mallocs.
-   *
-   * DOCDOC If it turns out to be a good idea, add it to the doxygen for this
-   * function.
-   */
-  if (buf->datalen == 0 && // buf->highwater == 0 &&
-      IS_FREELIST_SIZE(buf->len)) {
-    buf->highwater = 0;
-    if (add_buf_mem_to_freelist(buf))
-      return;
-  }
-  while (buf->highwater < (new_len>>2) && new_len > MIN_LAZY_SHRINK_SIZE*2)
-    new_len >>= 1;
-
-  buf->highwater = buf->datalen;
-  if (new_len == buf->len)
-    return;
-
-  log_debug(LD_MM,"Shrinking buffer from %d to %d bytes.",
-            (int)buf->len, (int)new_len);
-  buf_resize(buf, new_len);
+  (void)buf;
 }
 
 /** Remove the first <b>n</b> bytes from buf. */
@@ -484,66 +374,57 @@
 buf_remove_from_front(buf_t *buf, size_t n)
 {
   tor_assert(buf->datalen >= n);
-  buf->datalen -= n;
-  if (buf->datalen) {
-    buf->cur = _wrap_ptr(buf, buf->cur+n);
-  } else {
-    buf->cur = buf->mem;
-    if (IS_FREELIST_SIZE(buf->len)) {
-      buf->highwater = 0;
-
-      if (add_buf_mem_to_freelist(buf))
-        return;
+  while (n) {
+    tor_assert(buf->head);
+    if (buf->head->datalen > n) {
+      buf->head->datalen -= n;
+      buf->head->data += n;
+      buf->datalen -= n;
+      return;
+    } else {
+      chunk_t *victim = buf->head;
+      n -= victim->datalen;
+      buf->datalen -= victim->datalen;
+      buf->head = victim->next;
+      if (buf->tail == victim)
+        buf->tail = NULL;
+      chunk_free(victim);
     }
   }
   check();
 }
 
-/** Make sure that the memory in buf ends with a zero byte. */
-static INLINE int
-buf_nul_terminate(buf_t *buf)
-{
-  if (buf_ensure_capacity(buf,buf->datalen+1)<0)
-    return -1;
-  *_buf_end(buf) = '\0';
-  return 0;
-}
-
 /** Create and return a new buf with capacity <b>size</b>.
  * (Used for testing). */
 buf_t *
 buf_new_with_capacity(size_t size)
 {
-  buf_t *buf;
-  buf = tor_malloc_zero(sizeof(buf_t));
-  buf->magic = BUFFER_MAGIC;
-  if (IS_FREELIST_SIZE(size)) {
-    buf_get_initial_mem(buf, size);
-  } else {
-    buf->cur = buf->mem = GUARDED_MEM(tor_malloc(ALLOC_LEN(size)));
-    SET_GUARDS(buf->mem, size);
-    buf->len = size;
-    buf->memsize = ALLOC_LEN(size);
-  }
-
-  assert_buf_ok(buf);
-  return buf;
+  buf_t *b = buf_new();
+  b->default_chunk_size = preferred_chunk_size(size);
+  return b;
 }
 
 /** Allocate and return a new buffer with default capacity. */
 buf_t *
 buf_new(void)
 {
-  return buf_new_with_capacity(INITIAL_BUF_SIZE);
+  buf_t *buf = tor_malloc_zero(sizeof(buf_t));
+  buf->magic = BUFFER_MAGIC;
+  buf->default_chunk_size = 4096;
+  return buf;
 }
 
 /** Remove all data from <b>buf</b>. */
 void
 buf_clear(buf_t *buf)
 {
+  chunk_t *chunk, *next;
   buf->datalen = 0;
-  buf->cur = buf->mem;
-  /* buf->len = buf->memsize; bad. */
+  for (chunk = buf->head; chunk; chunk = next) {
+    next = chunk->next;
+    chunk_free(chunk);
+  }
+  buf->head = buf->tail = NULL;
 }
 
 /** Return the number of bytes stored in <b>buf</b> */
@@ -553,55 +434,70 @@
   return buf->datalen;
 }
 
-/** Return the maximum bytes that can be stored in <b>buf</b> before buf
- * needs to resize. */
+/** Return the total length of all chunks used in <b>buf</b>. */
 size_t
-buf_capacity(const buf_t *buf)
+buf_allocation(const buf_t *buf)
 {
-  return buf->len;
+  size_t total = 0;
+  const chunk_t *chunk;
+  for (chunk = buf->head; chunk; chunk = chunk->next) {
+    total += chunk->memlen;
+  }
+  return total;
 }
 
-/** For testing only: Return a pointer to the raw memory stored in
- * <b>buf</b>. */
-const char *
-_buf_peek_raw_buffer(const buf_t *buf)
+/** Return the number of bytes that can be added to <b>buf</b> without
+ * performing any additional allocation. */
+size_t
+buf_slack(const buf_t *buf)
 {
-  return buf->cur;
+  if (!buf->tail)
+    return 0;
+  else
+    return CHUNK_REMAINING_CAPACITY(buf->tail);
 }
 
 /** Release storage held by <b>buf</b>. */
 void
 buf_free(buf_t *buf)
 {
-  char *oldmem;
-  assert_buf_ok(buf);
-  buf->magic = 0xDEADBEEF;
-  if (IS_FREELIST_SIZE(buf->len)) {
-    buf->datalen = 0; /* Avoid assert in add_buf_mem_to_freelist. */
-    add_buf_mem_to_freelist(buf);
+  buf_clear(buf);
+  buf->magic = 0xdeadbeef;
+  tor_free(buf);
+}
+
+/** Append a new chunk with enough capacity to hold <b>cap</b> bytes to the
+ * tail of <b>buf</b>. */
+static chunk_t *
+buf_add_chunk_with_capacity(buf_t *buf, size_t cap)
+{
+  chunk_t *chunk;
+  if (CHUNK_ALLOC_SIZE(cap) < buf->default_chunk_size) {
+    chunk = chunk_new_with_alloc_size(buf->default_chunk_size);
+  } else {
+    chunk = chunk_new_with_alloc_size(preferred_chunk_size(cap));
   }
-  if (buf->mem) {
-    /* The freelist didn't want the RAM. */
-    oldmem = RAW_MEM(buf->mem);
-    tor_free(oldmem);
+  if (buf->tail) {
+    tor_assert(buf->head);
+    buf->tail->next = chunk;
+    buf->tail = chunk;
+  } else {
+    tor_assert(!buf->head);
+    buf->head = buf->tail = chunk;
   }
-  tor_free(buf);
+  check();
+  return chunk;
 }
 
-/** Helper for read_to_buf(): read no more than at_most bytes from
- * socket s into buffer buf, starting at the position pos.  (Does not
- * check for overflow.)  Set *reached_eof to true on EOF.  Return
- * number of bytes read on success, 0 if the read would block, -1 on
- * failure.
- */
+/** DOCDOC */
 static INLINE int
-read_to_buf_impl(int s, size_t at_most, buf_t *buf,
-                 char *pos, int *reached_eof)
+read_to_chunk(buf_t *buf, chunk_t *chunk, int fd, size_t at_most,
+              int *reached_eof)
 {
   int read_result;
 
-//  log_fn(LOG_DEBUG,"reading at most %d bytes.",at_most);
-  read_result = tor_socket_recv(s, pos, at_most, 0);
+  tor_assert(CHUNK_REMAINING_CAPACITY(chunk) >= at_most);
+  read_result = tor_socket_recv(fd, CHUNK_WRITE_PTR(chunk), at_most, 0);
   if (read_result < 0) {
     int e = tor_socket_errno(s);
     if (!ERRNO_IS_EAGAIN(e)) { /* it's a real error */
@@ -613,93 +509,75 @@
     }
     return 0; /* would block. */
   } else if (read_result == 0) {
-    log_debug(LD_NET,"Encountered eof");
+    log_debug(LD_NET,"Encountered eof on fd %d", (int)fd);
     *reached_eof = 1;
     return 0;
-  } else { /* we read some bytes */
+  } else { /* actually got bytes. */
     buf->datalen += read_result;
-    if (buf->datalen > buf->highwater)
-      buf->highwater = buf->datalen;
-    log_debug(LD_NET,"Read %d bytes. %d on inbuf.",read_result,
+    chunk->datalen += read_result;
+    log_debug(LD_NET,"Read %d bytes. %d on inbuf.", read_result,
               (int)buf->datalen);
     return read_result;
   }
 }
 
+static INLINE int
+read_to_chunk_tls(buf_t *buf, chunk_t *chunk, tor_tls_t *tls,
+                  size_t at_most)
+{
+  int read_result;
+
+  tor_assert(CHUNK_REMAINING_CAPACITY(chunk) >= at_most);
+  read_result = tor_tls_read(tls, CHUNK_WRITE_PTR(chunk), at_most);
+  if (read_result < 0)
+    return read_result;
+  buf->datalen += read_result;
+  chunk->datalen += read_result;
+  return read_result;
+}
+
 /** Read from socket <b>s</b>, writing onto end of <b>buf</b>.  Read at most
  * <b>at_most</b> bytes, resizing the buffer as necessary.  If recv()
  * returns 0, set *<b>reached_eof</b> to 1 and return 0. Return -1 on error;
  * else return the number of bytes read.  Return 0 if recv() would
  * block.
+ *
+ * DOCDOC revise
  */
 int
 read_to_buf(int s, size_t at_most, buf_t *buf, int *reached_eof)
 {
-  int r;
-  char *next;
-  size_t at_start;
+  int r = 0;
+  size_t total_read = 0;
 
-  /* assert_buf_ok(buf); */
+  check();
   tor_assert(reached_eof);
-  tor_assert(s>=0);
+  tor_assert(s >= 0);
 
-  if (buf_ensure_capacity(buf,buf->datalen+at_most))
-    return -1;
-
-  if (at_most + buf->datalen > buf->len)
-    at_most = buf->len - buf->datalen; /* take the min of the two */
-
-  if (at_most == 0)
-    return 0; /* we shouldn't read anything */
-
-  next = _buf_end(buf);
-  _split_range(buf, next, &at_most, &at_start);
-
-  r = read_to_buf_impl(s, at_most, buf, next, reached_eof);
-  check();
-  if (r < 0 || (size_t)r < at_most) {
-    return r; /* Either error, eof, block, or no more to read. */
-  }
-
-  if (at_start) {
-    int r2;
-    tor_assert(_buf_end(buf) == buf->mem);
-    r2 = read_to_buf_impl(s, at_start, buf, buf->mem, reached_eof);
-    check();
-    if (r2 < 0) {
-      return r2;
+  while (at_most) {
+    size_t readlen = at_most;
+    chunk_t *chunk;
+    if (!buf->tail || CHUNK_REMAINING_CAPACITY(buf->tail) < MIN_READ_LEN) {
+      chunk = buf_add_chunk_with_capacity(buf, at_most);
+      tor_assert(CHUNK_REMAINING_CAPACITY(chunk) >= readlen);
     } else {
-      r += r2;
+      size_t cap = CHUNK_REMAINING_CAPACITY(buf->tail);
+      chunk = buf->tail;
+      if (cap < readlen)
+        readlen = cap;
     }
+
+    r = read_to_chunk(buf, chunk, s, readlen, reached_eof);
+    check();
+    if (r < 0)
+      return r; /* Error */
+    else if ((size_t)r < readlen) /* eof, block, or no more to read. */
+      return r + total_read;
+    total_read += r;
   }
   return r;
 }
 
-/** Helper for read_to_buf_tls(): read no more than <b>at_most</b>
- * bytes from the TLS connection <b>tls</b> into buffer <b>buf</b>,
- * starting at the position <b>next</b>.  (Does not check for overflow.)
- * Return number of bytes read on success, 0 if the read would block,
- * -1 on failure.
- */
-static INLINE int
-read_to_buf_tls_impl(tor_tls_t *tls, size_t at_most, buf_t *buf, char *next)
-{
-  int r;
-
-  log_debug(LD_NET,"before: %d on buf, %d pending, at_most %d.",
-            (int)buf_datalen(buf), (int)tor_tls_get_pending_bytes(tls),
-            (int)at_most);
-  r = tor_tls_read(tls, next, at_most);
-  if (r<0)
-    return r;
-  buf->datalen += r;
-  if (buf->datalen > buf->highwater)
-    buf->highwater = buf->datalen;
-  log_debug(LD_NET,"Read %d bytes. %d on inbuf; %d pending",r,
-            (int)buf->datalen,(int)tor_tls_get_pending_bytes(tls));
-  return r;
-}
-
 /** As read_to_buf, but reads from a TLS connection.
  *
  * Using TLS on OR connections complicates matters in two ways.
@@ -718,47 +596,36 @@
  * Second, the TLS stream's events do not correspond directly to network
  * events: sometimes, before a TLS stream can read, the network must be
  * ready to write -- or vice versa.
+ *
+ * DOCDOC revise
  */
 int
 read_to_buf_tls(tor_tls_t *tls, size_t at_most, buf_t *buf)
 {
-  int r;
-  char *next;
-  size_t at_start;
-
-  tor_assert(tls);
-  assert_buf_ok(buf);
-
-  log_debug(LD_NET,"start: %d on buf, %d pending, at_most %d.",
-            (int)buf_datalen(buf), (int)tor_tls_get_pending_bytes(tls),
-            (int)at_most);
-
-  if (buf_ensure_capacity(buf, at_most+buf->datalen))
-    return TOR_TLS_ERROR_MISC;
-
-  if (at_most + buf->datalen > buf->len)
-    at_most = buf->len - buf->datalen;
-
-  if (at_most == 0)
-    return 0;
-
-  next = _buf_end(buf);
-  _split_range(buf, next, &at_most, &at_start);
-
-  r = read_to_buf_tls_impl(tls, at_most, buf, next);
+  int r = 0;
+  size_t total_read = 0;
   check();
-  if (r < 0 || (size_t)r < at_most)
-    return r; /* Either error, eof, block, or no more to read. */
 
-  if (at_start) {
-    int r2;
-    tor_assert(_buf_end(buf) == buf->mem);
-    r2 = read_to_buf_tls_impl(tls, at_start, buf, buf->mem);
+  while (at_most) {
+    size_t readlen = at_most;
+    chunk_t *chunk;
+    if (!buf->tail || CHUNK_REMAINING_CAPACITY(buf->tail) < MIN_READ_LEN) {
+      chunk = buf_add_chunk_with_capacity(buf, at_most);
+      tor_assert(CHUNK_REMAINING_CAPACITY(chunk) >= readlen);
+    } else {
+      size_t cap = CHUNK_REMAINING_CAPACITY(buf->tail);
+      chunk = buf->tail;
+      if (cap < readlen)
+        readlen = cap;
+    }
+
+    r = read_to_chunk_tls(buf, chunk, tls, readlen);
     check();
-    if (r2 < 0)
-      return r2;
-    else
-      r += r2;
+    if (r < 0)
+      return r; /* Error */
+    else if ((size_t)r < readlen) /* eof, block, or no more to read. */
+      return r + total_read;
+    total_read += r;
   }
   return r;
 }
@@ -769,11 +636,13 @@
  * Return the number of bytes written on success, -1 on failure.
  */
 static INLINE int
-flush_buf_impl(int s, buf_t *buf, size_t sz, size_t *buf_flushlen)
+flush_chunk(int s, buf_t *buf, chunk_t *chunk, size_t sz,
+               size_t *buf_flushlen)
 {
   int write_result;
 
-  write_result = tor_socket_send(s, buf->cur, sz, 0);
+  tor_assert(sz <= chunk->datalen);
+  write_result = tor_socket_send(s, chunk->data, sz, 0);
   if (write_result < 0) {
     int e = tor_socket_errno(s);
     if (!ERRNO_IS_EAGAIN(e)) { /* it's a real error */
@@ -792,6 +661,27 @@
   }
 }
 
+static INLINE int
+flush_chunk_tls(tor_tls_t *tls, buf_t *buf, chunk_t *chunk,
+                     size_t sz, size_t *buf_flushlen)
+{
+  int r;
+  size_t forced;
+
+  forced = tor_tls_get_forced_write_size(tls);
+  if (forced > sz)
+    sz = forced;
+  tor_assert(sz <= chunk->datalen);
+  r = tor_tls_write(tls, chunk->data, sz);
+  if (r < 0)
+    return r;
+  *buf_flushlen -= r;
+  buf_remove_from_front(buf, r);
+  log_debug(LD_NET,"flushed %d bytes, %d ready to flush, %d remain.",
+            r,(int)*buf_flushlen,(int)buf->datalen);
+  return r;
+}
+
 /** Write data from <b>buf</b> to the socket <b>s</b>.  Write at most
  * <b>sz</b> bytes, decrement *<b>buf_flushlen</b> by
  * the number of bytes actually written, and remove the written bytes
@@ -803,78 +693,37 @@
 {
   int r;
   size_t flushed = 0;
-  size_t flushlen0, flushlen1;
-
-  /* assert_buf_ok(buf); */
   tor_assert(buf_flushlen);
-  tor_assert(s>=0);
+  tor_assert(s >= 0);
   tor_assert(*buf_flushlen <= buf->datalen);
   tor_assert(sz <= *buf_flushlen);
 
-  if (sz == 0) /* nothing to flush */
-    return 0;
-
-  flushlen0 = sz;
-  _split_range(buf, buf->cur, &flushlen0, &flushlen1);
-
-  r = flush_buf_impl(s, buf, flushlen0, buf_flushlen);
   check();
+  while (sz) {
+    size_t flushlen0;
+    tor_assert(buf->head);
+    if (buf->head->datalen >= sz)
+      flushlen0 = sz;
+    else
+      flushlen0 = buf->head->datalen;
 
-  log_debug(LD_NET,"%d: flushed %d bytes, %d ready to flush, %d remain.",
-            s,r,(int)*buf_flushlen,(int)buf->datalen);
-  if (r < 0 || (size_t)r < flushlen0)
-    return r; /* Error, or can't flush any more now. */
-  flushed = r;
-
-  if (flushlen1) {
-    tor_assert(buf->cur == buf->mem);
-    r = flush_buf_impl(s, buf, flushlen1, buf_flushlen);
+    r = flush_chunk(s, buf, buf->head, flushlen0, buf_flushlen);
     check();
-    log_debug(LD_NET,"%d: flushed %d bytes, %d ready to flush, %d remain.",
-              s,r,(int)*buf_flushlen,(int)buf->datalen);
-    if (r<0)
+    if (r < 0)
       return r;
     flushed += r;
+    sz -= r;
   }
   return flushed;
 }
 
-/** Helper for flush_buf_tls(): try to write <b>sz</b> bytes (or more if
- * required by a previous write) from buffer <b>buf</b> onto TLS object
- * <b>tls</b>.  On success, deduct the bytes written from
- * *<b>buf_flushlen</b>.  Return the number of bytes written on success, -1 on
- * failure.
- */
-static INLINE int
-flush_buf_tls_impl(tor_tls_t *tls, buf_t *buf, size_t sz, size_t *buf_flushlen)
-{
-  int r;
-  size_t forced;
-
-  forced = tor_tls_get_forced_write_size(tls);
-  if (forced > sz)
-    sz = forced;
-  r = tor_tls_write(tls, buf->cur, sz);
-  if (r < 0) {
-    return r;
-  }
-  *buf_flushlen -= r;
-  buf_remove_from_front(buf, r);
-  log_debug(LD_NET,"flushed %d bytes, %d ready to flush, %d remain.",
-            r,(int)*buf_flushlen,(int)buf->datalen);
-  return r;
-}
-
 /** As flush_buf(), but writes data to a TLS connection.
  */
 int
 flush_buf_tls(tor_tls_t *tls, buf_t *buf, size_t sz, size_t *buf_flushlen)
 {
   int r;
-  size_t flushed=0;
-  size_t flushlen0, flushlen1;
-  /* assert_buf_ok(buf); */
-  tor_assert(tls);
+  size_t flushed = 0;
   tor_assert(buf_flushlen);
   tor_assert(*buf_flushlen <= buf->datalen);
   tor_assert(sz <= *buf_flushlen);
@@ -883,26 +732,21 @@
    * have a partial record pending */
   check_no_tls_errors();
 
-  flushlen0 = sz;
-  _split_range(buf, buf->cur, &flushlen0, &flushlen1);
-  if (flushlen1) {
-    size_t forced = tor_tls_get_forced_write_size(tls);
-    tor_assert(forced <= flushlen0);
-  }
-
-  r = flush_buf_tls_impl(tls, buf, flushlen0, buf_flushlen);
   check();
-  if (r < 0 || (size_t)r < flushlen0)
-    return r; /* Error, or can't flush any more now. */
-  flushed = r;
+  while (sz) {
+    size_t flushlen0;
+    tor_assert(buf->head);
+    if (buf->head->datalen >= sz)
+      flushlen0 = sz;
+    else
+      flushlen0 = buf->head->datalen;
 
-  if (flushlen1) {
-    tor_assert(buf->cur == buf->mem);
-    r = flush_buf_tls_impl(tls, buf, flushlen1, buf_flushlen);
+    r = flush_chunk_tls(tls, buf, buf->head, flushlen0, buf_flushlen);
     check();
-    if (r<0)
+    if (r < 0)
       return r;
     flushed += r;
+    sz -= r;
   }
   return flushed;
 }
@@ -915,37 +759,28 @@
 int
 write_to_buf(const char *string, size_t string_len, buf_t *buf)
 {
-  char *next;
-  size_t len2;
+  if (!string_len)
+    return buf->datalen;
+  check();
 
-  /* append string to buf (growing as needed, return -1 if "too big")
-   * return total number of bytes on the buf
-   */
-
-  tor_assert(string);
-  /* assert_buf_ok(buf); */
-
-  if (buf_ensure_capacity(buf, buf->datalen+string_len)) {
-    log_warn(LD_MM, "buflen too small, can't hold %d bytes.",
-             (int)(buf->datalen+string_len));
-    return -1;
+  if (buf->tail && CHUNK_REMAINING_CAPACITY(buf->tail)) {
+    size_t copy = CHUNK_REMAINING_CAPACITY(buf->tail);
+    if (copy > string_len)
+      copy = string_len;
+    memcpy(CHUNK_WRITE_PTR(buf->tail), string, copy);
+    string_len -= copy;
+    string += copy;
+    buf->datalen += copy;
+    buf->tail->datalen += copy;
   }
 
-  next = _buf_end(buf);
-  _split_range(buf, next, &string_len, &len2);
-
-  memcpy(next, string, string_len);
-  buf->datalen += string_len;
-
-  if (len2) {
-    tor_assert(_buf_end(buf) == buf->mem);
-    memcpy(buf->mem, string+string_len, len2);
-    buf->datalen += len2;
+  if (string_len) {
+    chunk_t *newchunk = buf_add_chunk_with_capacity(buf, string_len);
+    memcpy(newchunk->data, string, string_len);
+    newchunk->datalen = string_len;
+    buf->datalen += string_len;
   }
-  if (buf->datalen > buf->highwater)
-    buf->highwater = buf->datalen;
-  log_debug(LD_NET,"added %d bytes to buf (now %d total).",
-            (int)string_len, (int)buf->datalen);
+
   check();
   return buf->datalen;
 }
@@ -954,25 +789,25 @@
  * onto <b>string</b>.
  */
 static INLINE void
-peek_from_buf(char *string, size_t string_len, buf_t *buf)
+peek_from_buf(char *string, size_t string_len, const buf_t *buf)
 {
-  size_t len2;
+  chunk_t *chunk;
 
-  /* There must be string_len bytes in buf; write them onto string,
-   * then memmove buf back (that is, remove them from buf).
-   *
-   * Return the number of bytes still on the buffer. */
-
   tor_assert(string);
   /* make sure we don't ask for too much */
   tor_assert(string_len <= buf->datalen);
   /* assert_buf_ok(buf); */
 
-  _split_range(buf, buf->cur, &string_len, &len2);
-
-  memcpy(string, buf->cur, string_len);
-  if (len2) {
-    memcpy(string+string_len,buf->mem,len2);
+  chunk = buf->head;
+  while (string_len) {
+    size_t copy = string_len;
+    tor_assert(chunk);
+    if (chunk->datalen < copy)
+      copy = chunk->datalen;
+    memcpy(string, chunk->data, copy);
+    string_len -= copy;
+    string += copy;
+    chunk = chunk->next;
   }
 }
 
@@ -1037,6 +872,7 @@
 int
 move_buf_to_buf(buf_t *buf_out, buf_t *buf_in, size_t *buf_flushlen)
 {
+  /*XXXX020 we can do way better here. */
   char b[4096];
   size_t cp, len;
   len = *buf_flushlen;
@@ -1085,16 +921,28 @@
   char *headers, *body, *p;
   size_t headerlen, bodylen, contentlen;
 
-  /* assert_buf_ok(buf); */
-  buf_normalize(buf);
+  check();
+  if (!buf->head)
+    return 0;
 
-  if (buf_nul_terminate(buf)<0) {
-    log_warn(LD_BUG,"Couldn't nul-terminate buffer");
-    return -1;
+  headers = buf->head->data;
+  /* See if CRLFCRLF is already in the head chunk.  If it is, we don't need
+   * to move or resize anything. */
+  body = (char*) tor_memmem(buf->head->data, buf->head->datalen,
+                            "\r\n\r\n", 4);
+  if (!body && buf->datalen > buf->head->datalen) {
+    buf_pullup(buf, max_headerlen, 1);
+    headers = buf->head->data;
+    /*XXX020 avoid searching the original part of the head chunk twice. */
+    body = (char*) tor_memmem(buf->head->data, buf->head->datalen,
+                              "\r\n\r\n", 4);
   }
-  headers = buf->cur;
-  body = strstr(headers,"\r\n\r\n");
+
   if (!body) {
+    if (buf->head->datalen >= max_headerlen) {
+      log_debug(LD_HTTP,"headers too long.");
+      return -1;
+    }
     log_debug(LD_HTTP,"headers not all here yet.");
     return 0;
   }
@@ -1141,17 +989,17 @@
   /* all happy. copy into the appropriate places, and return 1 */
   if (headers_out) {
     *headers_out = tor_malloc(headerlen+1);
-    memcpy(*headers_out,buf->cur,headerlen);
+    fetch_from_buf(*headers_out, headerlen, buf);
     (*headers_out)[headerlen] = 0; /* nul terminate it */
   }
   if (body_out) {
     tor_assert(body_used);
     *body_used = bodylen;
     *body_out = tor_malloc(bodylen+1);
-    memcpy(*body_out,buf->cur+headerlen,bodylen);
+    fetch_from_buf(*body_out, bodylen, buf);
     (*body_out)[bodylen] = 0; /* nul terminate it */
   }
-  buf_remove_from_front(buf, headerlen+bodylen);
+  check();
   return 1;
 }
 
@@ -1187,6 +1035,7 @@
   unsigned int len;
   char tmpbuf[INET_NTOA_BUF_LEN];
   uint32_t destip;
+  uint8_t socksver;
   enum {socks4, socks4a} socks4_prot = socks4a;
   char *next, *startaddr;
   struct in_addr in;
@@ -1197,18 +1046,23 @@
 
   if (buf->datalen < 2) /* version and another byte */
     return 0;
-  buf_normalize(buf);
 
-  switch (*(buf->cur)) { /* which version of socks? */
+  buf_pullup(buf, 128, 0);
+  tor_assert(buf->head && buf->head->datalen >= 2);
 
+  socksver = *buf->head->data;
+
+  switch (socksver) { /* which version of socks? */
+
     case 5: /* socks5 */
 
       if (req->socks_version != 5) { /* we need to negotiate a method */
-        unsigned char nummethods = (unsigned char)*(buf->cur+1);
+        unsigned char nummethods = (unsigned char)*(buf->head->data+1);
         tor_assert(!req->socks_version);
         if (buf->datalen < 2u+nummethods)
           return 0;
-        if (!nummethods || !memchr(buf->cur+2, 0, nummethods)) {
+        buf_pullup(buf, 2u+nummethods, 0);
+        if (!nummethods || !memchr(buf->head->data+2, 0, nummethods)) {
           log_warn(LD_APP,
                    "socks5: offered methods don't include 'no auth'. "
                    "Rejecting.");
@@ -1232,7 +1086,8 @@
       log_debug(LD_APP,"socks5: checking request");
       if (buf->datalen < 8) /* basic info plus >=2 for addr plus 2 for port */
         return 0; /* not yet */
-      req->command = (unsigned char) *(buf->cur+1);
+      tor_assert(buf->head->datalen >= 8);
+      req->command = (unsigned char) *(buf->head->data+1);
       if (req->command != SOCKS_COMMAND_CONNECT &&
           req->command != SOCKS_COMMAND_RESOLVE &&
           req->command != SOCKS_COMMAND_RESOLVE_PTR) {
@@ -1241,13 +1096,13 @@
                  req->command);
         return -1;
       }
-      switch (*(buf->cur+3)) { /* address type */
+      switch (*(buf->head->data+3)) { /* address type */
         case 1: /* IPv4 address */
           log_debug(LD_APP,"socks5: ipv4 address type");
           if (buf->datalen < 10) /* ip/port there? */
             return 0; /* not yet */
 
-          destip = ntohl(*(uint32_t*)(buf->cur+4));
+          destip = ntohl(*(uint32_t*)(buf->head->data+4));
           in.s_addr = htonl(destip);
           tor_inet_ntoa(&in,tmpbuf,sizeof(tmpbuf));
           if (strlen(tmpbuf)+1 > MAX_SOCKS_ADDR_LEN) {
@@ -1258,7 +1113,7 @@
             return -1;
           }
           strlcpy(req->address,tmpbuf,sizeof(req->address));
-          req->port = ntohs(*(uint16_t*)(buf->cur+8));
+          req->port = ntohs(*(uint16_t*)(buf->head->data+8));
           buf_remove_from_front(buf, 10);
           if (req->command != SOCKS_COMMAND_RESOLVE_PTR &&
               !addressmap_have_mapping(req->address) &&
@@ -1286,18 +1141,20 @@
                      "hostname type. Rejecting.");
             return -1;
           }
-          len = (unsigned char)*(buf->cur+4);
+          len = (unsigned char)*(buf->head->data+4);
           if (buf->datalen < 7+len) /* addr/port there? */
             return 0; /* not yet */
+          buf_pullup(buf, 7+len, 0);
+          tor_assert(buf->head->datalen >= 7+len);
           if (len+1 > MAX_SOCKS_ADDR_LEN) {
             log_warn(LD_APP,
                      "socks5 hostname is %d bytes, which doesn't fit in "
                      "%d. Rejecting.", len+1,MAX_SOCKS_ADDR_LEN);
             return -1;
           }
-          memcpy(req->address,buf->cur+5,len);
+          memcpy(req->address,buf->head->data+5,len);
           req->address[len] = 0;
-          req->port = ntohs(get_uint16(buf->cur+5+len));
+          req->port = ntohs(get_uint16(buf->head->data+5+len));
           buf_remove_from_front(buf, 5+len+2);
           if (!tor_strisprint(req->address) || strchr(req->address,'\"')) {
             log_warn(LD_PROTOCOL,
@@ -1314,7 +1171,7 @@
           return 1;
         default: /* unsupported */
           log_warn(LD_APP,"socks5: unsupported address type %d. Rejecting.",
-                   *(buf->cur+3));
+                   (int) *(buf->head->data+3));
           return -1;
       }
       tor_assert(0);
@@ -1325,8 +1182,8 @@
       req->socks_version = 4;
       if (buf->datalen < SOCKS4_NETWORK_LEN) /* basic info available? */
         return 0; /* not yet */
-
-      req->command = (unsigned char) *(buf->cur+1);
+      buf_pullup(buf, 1280, 0);
+      req->command = (unsigned char) *(buf->head->data+1);
       if (req->command != SOCKS_COMMAND_CONNECT &&
           req->command != SOCKS_COMMAND_RESOLVE) {
         /* not a connect or resolve? we don't support it. (No resolve_ptr with
@@ -1336,8 +1193,8 @@
         return -1;
       }
 
-      req->port = ntohs(*(uint16_t*)(buf->cur+2));
-      destip = ntohl(*(uint32_t*)(buf->mem+4));
+      req->port = ntohs(*(uint16_t*)(buf->head->data+2));
+      destip = ntohl(*(uint32_t*)(buf->head->data+4));
       if ((!req->port && req->command!=SOCKS_COMMAND_RESOLVE) || !destip) {
         log_warn(LD_APP,"socks4: Port or DestIP is zero. Rejecting.");
         return -1;
@@ -1356,13 +1213,17 @@
         socks4_prot = socks4;
       }
 
-      next = memchr(buf->cur+SOCKS4_NETWORK_LEN, 0,
-                    buf->datalen-SOCKS4_NETWORK_LEN);
+      next = memchr(buf->head->data+SOCKS4_NETWORK_LEN, 0,
+                    buf->head->datalen-SOCKS4_NETWORK_LEN);
       if (!next) {
+        if (buf->head->datalen >= 1024) {
+          log_debug(LD_APP, "Socks4 user name too long; rejecting.");
+          return -1;
+        }
         log_debug(LD_APP,"socks4: Username not here yet.");
         return 0;
       }
-      tor_assert(next < buf->cur+buf->datalen);
+      tor_assert(next < CHUNK_WRITE_PTR(buf->head));
 
       startaddr = NULL;
       if (socks4_prot != socks4a &&
@@ -1384,13 +1245,17 @@
           return -1;
       }
       if (socks4_prot == socks4a) {
-        if (next+1 == buf->cur+buf->datalen) {
+        if (next+1 == CHUNK_WRITE_PTR(buf->head)) {
           log_debug(LD_APP,"socks4: No part of destaddr here yet.");
           return 0;
         }
         startaddr = next+1;
-        next = memchr(startaddr, 0, buf->cur+buf->datalen-startaddr);
+        next = memchr(startaddr, 0, CHUNK_WRITE_PTR(buf->head)-startaddr);
         if (!next) {
+          if (buf->head->datalen >= 1024) {
+            log_debug(LD_APP,"socks4: Destaddr too long.");
+            return -1;
+          }
           log_debug(LD_APP,"socks4: Destaddr not all here yet.");
           return 0;
         }
@@ -1398,7 +1263,7 @@
           log_warn(LD_APP,"socks4: Destaddr too long. Rejecting.");
           return -1;
         }
-        tor_assert(next < buf->cur+buf->datalen);
+        // tor_assert(next < buf->cur+buf->datalen);
 
         if (log_sockstype)
           log_notice(LD_APP,
@@ -1417,7 +1282,7 @@
         return -1;
       }
       /* next points to the final \0 on inbuf */
-      buf_remove_from_front(buf, next-buf->cur+1);
+      buf_remove_from_front(buf, next - buf->head->data + 1);
       return 1;
 
     case 'G': /* get */
@@ -1455,9 +1320,9 @@
     default: /* version is not socks4 or socks5 */
       log_warn(LD_APP,
                "Socks version %d not recognized. (Tor is not an http proxy.)",
-               *(buf->cur));
+               *(buf->head->data));
       {
-        char *tmp = tor_strndup(buf->cur, 8);
+        char *tmp = tor_strndup(buf->head->data, 8); /*XXXX what if longer?*/
         control_event_client_status(LOG_WARN,
                                     "SOCKS_UNKNOWN_PROTOCOL DATA=\"%s\"",
                                     escaped(tmp));
@@ -1483,27 +1348,12 @@
   return 0;
 }
 
-/** Helper: return a pointer to the first instance of <b>c</b> in the
- * <b>len</b>characters after <b>start</b> on <b>buf</b>. Return NULL if the
- * character isn't found. */
-static char *
-find_char_on_buf(buf_t *buf, char *start, size_t len, char c)
-{
-  size_t len_rest;
-  char *cp;
-  _split_range(buf, start, &len, &len_rest);
-  cp = memchr(start, c, len);
-  if (cp || !len_rest)
-    return cp;
-  return memchr(buf->mem, c, len_rest);
-}
-
 /** Try to read a single LF-terminated line from <b>buf</b>, and write it,
  * NUL-terminated, into the *<b>data_len</b> byte buffer at <b>data_out</b>.
  * Set *<b>data_len</b> to the number of bytes in the line, not counting the
  * terminating NUL.  Return 1 if we read a whole line, return 0 if we don't
  * have a whole line yet, and return -1 if the line length exceeds
- *<b>data_len</b>.
+ * *<b>data_len</b>.
  */
 int
 fetch_from_buf_line(buf_t *buf, char *data_out, size_t *data_len)
@@ -1511,11 +1361,15 @@
   char *cp;
   size_t sz;
 
-  size_t remaining = buf->datalen - _buf_offset(buf,buf->cur);
-  cp = find_char_on_buf(buf, buf->cur, remaining, '\n');
-  if (!cp)
+  if (!buf->head)
     return 0;
-  sz = _buf_offset(buf, cp);
+  /* XXXX020 pull up less aggressively. */
+  buf_pullup(buf, *data_len, 0);
+  cp = memchr(buf->head->data, '\n', buf->head->datalen);
+  if (!cp) {
+    return 0;
+  }
+  sz = cp - buf->head->data;
   if (sz+2 > *data_len) {
     *data_len = sz+2;
     return -1;
@@ -1539,12 +1393,15 @@
   size_t old_avail, avail;
   int over = 0;
   do {
-    buf_ensure_capacity(buf, buf->datalen + 1024);
-    next = _buf_end(buf);
-    if (next < buf->cur)
-      old_avail = avail = buf->cur - next;
-    else
-      old_avail = avail = (buf->mem + buf->len) - next;
+    int need_new_chunk = 0;
+    if (!buf->tail || ! CHUNK_REMAINING_CAPACITY(buf->tail)) {
+      size_t cap = data_len / 4;
+      if (cap > MAX_CHUNK_ALLOC) /* Add a function for this. */
+        cap = MAX_CHUNK_ALLOC;
+      buf_add_chunk_with_capacity(buf, cap);
+    }
+    next = CHUNK_WRITE_PTR(buf->tail);
+    avail = old_avail = CHUNK_REMAINING_CAPACITY(buf->tail);
     switch (tor_zlib_process(state, &next, &avail, &data, &data_len, done)) {
       case TOR_ZLIB_DONE:
         over = 1;
@@ -1556,28 +1413,21 @@
           over = 1;
         break;
       case TOR_ZLIB_BUF_FULL:
-        if (avail && buf->len >= 1024 + buf->datalen) {
-          /* Zlib says we need more room (ZLIB_BUF_FULL), and we're not about
-           * to wrap around (avail != 0), and resizing won't actually make us
-           * un-full: we're at the end of the buffer, and zlib refuses to
-           * append more here, but there's a pile of free space at the start
-           * of the buffer (about 1K).  So chop a few characters off the
-           * end of the buffer.  This feels silly; anybody got a better hack?
-           *
-           * (We don't just want to expand the buffer nevertheless. Consider a
-           * 1/3 full buffer with a single byte free at the end. zlib will
-           * often refuse to append to that, and so we want to use the
-           * beginning, not double the buffer to be just 1/6 full.)
-           */
-          tor_assert(next >= buf->cur);
-          buf->len -= avail;
+        if (avail) {
+          /* Zlib says we need more room (ZLIB_BUF_FULL).  Start a new chunk
+           * automatically, whether were going to or not. */
+          need_new_chunk = 1;
         }
         break;
     }
     buf->datalen += old_avail - avail;
-    if (buf->datalen > buf->highwater)
-      buf->highwater = buf->datalen;
+    buf->tail->datalen += old_avail - avail;
+    if (need_new_chunk) {
+      buf_add_chunk_with_capacity(buf, data_len/4);
+    }
+
   } while (!over);
+  check();
   return 0;
 }
 
@@ -1588,26 +1438,42 @@
 {
   tor_assert(buf);
   tor_assert(buf->magic == BUFFER_MAGIC);
-  tor_assert(buf->highwater <= buf->len);
-  tor_assert(buf->datalen <= buf->highwater);
 
-  if (buf->mem) {
-    tor_assert(buf->cur >= buf->mem);
-    tor_assert(buf->cur < buf->mem+buf->len);
-    tor_assert(buf->memsize == ALLOC_LEN(buf->len));
+  if (! buf->head) {
+    tor_assert(!buf->tail);
+    tor_assert(buf->datalen == 0);
   } else {
-    tor_assert(!buf->cur);
-    tor_assert(!buf->len);
-    tor_assert(!buf->memsize);
+    chunk_t *ch;
+    size_t total = 0;
+    tor_assert(buf->tail);
+    for (ch = buf->head; ch; ch = ch->next) {
+      total += ch->datalen;
+      tor_assert(ch->datalen <= ch->memlen);
+      tor_assert(ch->data >= &ch->mem[0]);
+      tor_assert(ch->data < &ch->mem[0]+ch->memlen);
+      tor_assert(ch->data+ch->datalen <= &ch->mem[0] + ch->memlen);
+      if (!ch->next)
+        tor_assert(ch == buf->tail);
+    }
+    tor_assert(buf->datalen == total);
   }
+}
 
-#ifdef SENTINELS
-  if (buf->mem) {
-    uint32_t u32 = get_uint32(buf->mem - 4);
-    tor_assert(u32 == START_MAGIC);
-    u32 = get_uint32(buf->mem + buf->memsize - 8);
-    tor_assert(u32 == END_MAGIC);
+/** Log an error and exit if <b>fl</b> is corrupted.
+ */
+static void
+assert_freelist_ok(chunk_freelist_t *fl)
+{
+  chunk_t *ch;
+  int n;
+  tor_assert(fl->alloc_size > 0);
+  n = 0;
+  for (ch = fl->head; ch; ch = ch->next) {
+    tor_assert(CHUNK_ALLOC_SIZE(ch->memlen) == fl->alloc_size);
+    ++n;
   }
-#endif
+  tor_assert(n == fl->cur_length);
+  tor_assert(n >= fl->lowest_length);
+  tor_assert(n <= fl->max_length);
 }
 

Modified: tor/trunk/src/or/connection.c
===================================================================
--- tor/trunk/src/or/connection.c	2007-12-26 00:12:05 UTC (rev 12982)
+++ tor/trunk/src/or/connection.c	2007-12-26 00:12:08 UTC (rev 12983)
@@ -1873,7 +1873,7 @@
 connection_read_to_buf(connection_t *conn, int *max_to_read)
 {
   int result, at_most = *max_to_read;
-  size_t bytes_in_buf, more_to_read;
+  size_t slack_in_buf, more_to_read;
   size_t n_read = 0, n_written = 0;
 
   if (at_most == -1) { /* we need to initialize it */
@@ -1882,11 +1882,11 @@
     at_most = connection_bucket_read_limit(conn, time(NULL));
   }
 
-  bytes_in_buf = buf_capacity(conn->inbuf) - buf_datalen(conn->inbuf);
+  slack_in_buf = buf_slack(conn->inbuf);
  again:
-  if ((size_t)at_most > bytes_in_buf && bytes_in_buf >= 1024) {
-    more_to_read = at_most - bytes_in_buf;
-    at_most = bytes_in_buf;
+  if ((size_t)at_most > slack_in_buf && slack_in_buf >= 1024) {
+    more_to_read = at_most - slack_in_buf;
+    at_most = slack_in_buf;
   } else {
     more_to_read = 0;
   }
@@ -1997,8 +1997,7 @@
   connection_buckets_decrement(conn, time(NULL), n_read, n_written);
 
   if (more_to_read && result == at_most) {
-    bytes_in_buf = buf_capacity(conn->inbuf) - buf_datalen(conn->inbuf);
-    tor_assert(bytes_in_buf < 1024);
+    slack_in_buf = buf_slack(conn->inbuf);
     at_most = more_to_read;
     goto again;
   }
@@ -2785,11 +2784,11 @@
     ++n_conns_by_type[tp];
     if (c->inbuf) {
       used_by_type[tp] += buf_datalen(c->inbuf);
-      alloc_by_type[tp] += buf_capacity(c->inbuf);
+      alloc_by_type[tp] += buf_allocation(c->inbuf);
     }
     if (c->outbuf) {
       used_by_type[tp] += buf_datalen(c->outbuf);
-      alloc_by_type[tp] += buf_capacity(c->outbuf);
+      alloc_by_type[tp] += buf_allocation(c->outbuf);
     }
   });
   for (i=0; i <= _CONN_TYPE_MAX; ++i) {

Modified: tor/trunk/src/or/main.c
===================================================================
--- tor/trunk/src/or/main.c	2007-12-26 00:12:05 UTC (rev 12982)
+++ tor/trunk/src/or/main.c	2007-12-26 00:12:08 UTC (rev 12983)
@@ -1606,13 +1606,13 @@
           "Conn %d: %d bytes waiting on inbuf (len %d, last read %d secs ago)",
           i,
           (int)buf_datalen(conn->inbuf),
-          (int)buf_capacity(conn->inbuf),
+          (int)buf_allocation(conn->inbuf),
           (int)(now - conn->timestamp_lastread));
       log(severity,LD_GENERAL,
           "Conn %d: %d bytes waiting on outbuf "
           "(len %d, last written %d secs ago)",i,
           (int)buf_datalen(conn->outbuf),
-          (int)buf_capacity(conn->outbuf),
+          (int)buf_allocation(conn->outbuf),
           (int)(now - conn->timestamp_lastwritten));
     }
     circuit_dump_by_conn(conn, severity); /* dump info about all the circuits

Modified: tor/trunk/src/or/or.h
===================================================================
--- tor/trunk/src/or/or.h	2007-12-26 00:12:05 UTC (rev 12982)
+++ tor/trunk/src/or/or.h	2007-12-26 00:12:08 UTC (rev 12983)
@@ -2439,7 +2439,8 @@
 void buf_dump_freelist_sizes(int severity);
 
 size_t buf_datalen(const buf_t *buf);
-size_t buf_capacity(const buf_t *buf);
+size_t buf_allocation(const buf_t *buf);
+size_t buf_slack(const buf_t *buf);
 const char *_buf_peek_raw_buffer(const buf_t *buf);
 
 int read_to_buf(int s, size_t at_most, buf_t *buf, int *reached_eof);

Modified: tor/trunk/src/or/test.c
===================================================================
--- tor/trunk/src/or/test.c	2007-12-26 00:12:05 UTC (rev 12982)
+++ tor/trunk/src/or/test.c	2007-12-26 00:12:08 UTC (rev 12983)
@@ -127,7 +127,7 @@
   if (!(buf = buf_new()))
     test_fail();
 
-  test_eq(buf_capacity(buf), 4096);
+  //test_eq(buf_capacity(buf), 4096);
   test_eq(buf_datalen(buf), 0);
 
   /****
@@ -169,25 +169,25 @@
 
   /* Okay, now make sure growing can work. */
   buf = buf_new_with_capacity(16);
-  test_eq(buf_capacity(buf), 16);
+  //test_eq(buf_capacity(buf), 16);
   write_to_buf(str+1, 255, buf);
-  test_eq(buf_capacity(buf), 256);
+  //test_eq(buf_capacity(buf), 256);
   fetch_from_buf(str2, 254, buf);
   test_memeq(str+1, str2, 254);
-  test_eq(buf_capacity(buf), 256);
+  //test_eq(buf_capacity(buf), 256);
   assert_buf_ok(buf);
   write_to_buf(str, 32, buf);
-  test_eq(buf_capacity(buf), 256);
+  //test_eq(buf_capacity(buf), 256);
   assert_buf_ok(buf);
   write_to_buf(str, 256, buf);
   assert_buf_ok(buf);
-  test_eq(buf_capacity(buf), 512);
+  //test_eq(buf_capacity(buf), 512);
   test_eq(buf_datalen(buf), 33+256);
   fetch_from_buf(str2, 33, buf);
   test_eq(*str2, str[255]);
 
   test_memeq(str2+1, str, 32);
-  test_eq(buf_capacity(buf), 512);
+  //test_eq(buf_capacity(buf), 512);
   test_eq(buf_datalen(buf), 256);
   fetch_from_buf(str2, 256, buf);
   test_memeq(str, str2, 256);
@@ -198,7 +198,7 @@
   for (j=0;j<67;++j) {
     write_to_buf(str,255, buf);
   }
-  test_eq(buf_capacity(buf), 33668);
+  //test_eq(buf_capacity(buf), 33668);
   test_eq(buf_datalen(buf), 17085);
   for (j=0; j < 40; ++j) {
     fetch_from_buf(str2, 255,buf);
@@ -218,7 +218,7 @@
   for (j=0;j<80;++j) {
     write_to_buf(str,255, buf);
   }
-  test_eq(buf_capacity(buf),33668);
+  //test_eq(buf_capacity(buf),33668);
   for (j=0; j < 120; ++j) {
     fetch_from_buf(str2, 255,buf);
     test_memeq(str2, str, 255);
@@ -275,14 +275,14 @@
   printf("%s\n", strerror(errno));
   test_eq(i, 10);
   test_eq(eof, 0);
-  test_eq(buf_capacity(buf), 4096);
+  //test_eq(buf_capacity(buf), 4096);
   test_eq(buf_datalen(buf), 10);
 
   test_memeq(str, (char*)_buf_peek_raw_buffer(buf), 10);
 
   /* Test reading 0 bytes. */
   i = read_to_buf(s, 0, buf, &eof);
-  test_eq(buf_capacity(buf), 512*1024);
+  //test_eq(buf_capacity(buf), 512*1024);
   test_eq(buf_datalen(buf), 10);
   test_eq(eof, 0);
   test_eq(i, 0);
@@ -290,7 +290,7 @@
   /* Now test when buffer is filled exactly. */
   buf2 = buf_new_with_capacity(6);
   i = read_to_buf(s, 6, buf2, &eof);
-  test_eq(buf_capacity(buf2), 6);
+  //test_eq(buf_capacity(buf2), 6);
   test_eq(buf_datalen(buf2), 6);
   test_eq(eof, 0);
   test_eq(i, 6);
@@ -300,7 +300,7 @@
   /* Now test when buffer is filled with more data to read. */
   buf2 = buf_new_with_capacity(32);
   i = read_to_buf(s, 128, buf2, &eof);
-  test_eq(buf_capacity(buf2), 128);
+  //test_eq(buf_capacity(buf2), 128);
   test_eq(buf_datalen(buf2), 32);
   test_eq(eof, 0);
   test_eq(i, 32);