[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

Re: [Libevent-users] Re: libevent-2.0.8-rc/evdns.c:2672: Assertion (req)->handle && (req)->handle->current_req == (req) failed in evdns_cancel_request



On Wed, Nov 3, 2010 at 11:33 AM, Nick Mathewson <nickm@xxxxxxxxxxxxx> wrote:
> On Wed, Nov 3, 2010 at 7:41 AM, Denis Bilenko <denis.bilenko@xxxxxxxxx> wrote:
>> So what happens here, is that eventually reply_handle is called, which calls
>>
>> reply_schedule_callback(req, ttl, 0, reply); in evdns.c:872
>> and then
>> request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1); in evdns.c:876
>> which calls mm_free(req->handle)
>>
>> When I call cancel I pass this freed handle to evdns_cancel_request
>> and that causes crash.
>>
>> So the problem here is that the evdns_request structure is freed
>> before the user's callback is called and there's no way for the client
>> code to know when it's OK to cancel the request.
>>
>> I might be missing something, but why go through
>> reply_schedule_callback at all? Why not call the user's callback
>> immediately and avoid extra step which makes cancel() unsafe?
>
> Locking.  When we hit the point where we need to schedule a callback
> (or call it directly) we're pretty deep in the guts of the dns code,
> and we hold the lock on the evdns base.  We can't trivially drop the
> lock before calling the user callback, but holding it while invoking
> the user callback was an invitation for deadlock.
>
> Probably the right answer here is to add a reference count to the
> request, and not actually free it until the reference count hits zero.

Actually, we can go even simpler.  Here's the patch I've got in mind.

There may be better ways to do this, and simplify the code even more,
but I'd rather keep changes to this rats'-nest nice and minimal while
we're trying to stabilize it.

Denis, does this fix stuff for you?

-- 
Nick
From d1065f1f227f45e0137b868586920e9e65976f40 Mon Sep 17 00:00:00 2001
From: Nick Mathewson <nickm@xxxxxxxxxxxxxx>
Date: Wed, 3 Nov 2010 12:37:37 -0400
Subject: [PATCH] Don't free evdns_request handles until after the callback is invoked

Previously, once the callback was scheduled, it was unsafe to cancel
a request, but there was no way to tell that.  Now it is safe to
cancel a request until the callback is invoked, at which point it
isn't.

Found and diagnosed by Denis Bilenko.
---
 evdns.c |   44 +++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/evdns.c b/evdns.c
index 0d1d32e..de20a85 100644
--- a/evdns.c
+++ b/evdns.c
@@ -143,6 +143,10 @@
  * 'struct request' instances being created over its lifetime. */
 struct evdns_request {
 	struct request *current_req;
+	struct evdns_base *base;
+
+	int pending_cb; /* Waiting for its callback to be invoked; not
+			 * owned by event base any more. */
 
 	/* elements used by the searching code */
 	int search_index;
@@ -653,11 +657,20 @@ request_finished(struct request *const req, struct request **head, int free_hand
 
 	if (req->handle) {
 		EVUTIL_ASSERT(req->handle->current_req == req);
+
 		if (free_handle) {
 			search_request_finished(req->handle);
-			mm_free(req->handle);
-		} else
+			if (! req->handle->pending_cb) {
+				/* If we're planning to run the callback,
+				 * don't free the handle until later. */
+				mm_free(req->handle);
+			}
+			req->handle->current_req = NULL;
+			req->handle = NULL; /* If we have a bug, let's crash
+					     * early */
+		} else {
 			req->handle->current_req = NULL;
+		}
 	}
 
 	mm_free(req);
@@ -723,6 +736,7 @@ evdns_requests_pump_waiting_queue(struct evdns_base *base) {
 /* TODO(nickm) document */
 struct deferred_reply_callback {
 	struct deferred_cb deferred;
+	struct evdns_request *handle;
 	u8 request_type;
 	u8 have_reply;
 	u32 ttl;
@@ -769,6 +783,10 @@ reply_run_callback(struct deferred_cb *d, void *user_pointer)
 		EVUTIL_ASSERT(0);
 	}
 
+	if (cb->handle && cb->handle->pending_cb) {
+		mm_free(cb->handle);
+	}
+
 	mm_free(cb);
 }
 
@@ -788,6 +806,11 @@ reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct repl
 		memcpy(&d->reply, reply, sizeof(struct reply));
 	}
 
+	if (req->handle) {
+		req->handle->pending_cb = 1;
+		d->handle = req->handle;
+	}
+
 	event_deferred_cb_init(&d->deferred, reply_run_callback,
 	    req->user_pointer);
 	event_deferred_cb_schedule(
@@ -2637,8 +2660,10 @@ request_new(struct evdns_base *base, struct evdns_request *handle, int type,
 	req->ns = issuing_now ? nameserver_pick(base) : NULL;
 	req->next = req->prev = NULL;
 	req->handle = handle;
-	if (handle)
+	if (handle) {
 		handle->current_req = req;
+		handle->base = base;
+	}
 
 	return req;
 err1:
@@ -2671,10 +2696,19 @@ evdns_cancel_request(struct evdns_base *base, struct evdns_request *handle)
 
 	ASSERT_VALID_REQUEST(req);
 
-	if (!base)
-		base = req->base;
+	if (!base) {
+		/* This redundancy is silly; can we fix it? (Not for 2.0) XXXX */
+		base = handle->base;
+		if (!base)
+			base = req->base;
+	}
 
 	EVDNS_LOCK(base);
+	if (handle->pending_cb) {
+		EVDNS_UNLOCK(base);
+		return;
+	}
+
 	reply_schedule_callback(req, 0, DNS_ERR_CANCEL, NULL);
 	if (req->ns) {
 		/* remove from inflight queue */
-- 
1.7.2.3