[sr-dev] git:kamailio_3.0: tcp: fix fd passing bug

Andrei Pelinescu-Onciul andrei at iptel.org
Thu Aug 19 16:20:01 CEST 2010


Module: sip-router
Branch: kamailio_3.0
Commit: 66619631879132e3d93fd858fa81c79ac9788617
URL:    http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=66619631879132e3d93fd858fa81c79ac9788617

Author: Andrei Pelinescu-Onciul <andrei at iptel.org>
Committer: Andrei Pelinescu-Onciul <andrei at iptel.org>
Date:   Wed Jun 16 21:03:06 2010 +0200

tcp: fix fd passing bug

If connections are opened and closed very quickly when data is
sent on them, it is possible that a connection gets closed
(close() inside tcp_main) while a process waits for its fd, just
before tcp_main attempts to send the fd.  In this case the fd
sending will fail (one cannot send a closed fd) and the process
that asked for it will remain waiting forever.
The fix always checks before attempting to send the fd if the fd is
still open and the connection is not in a "bad" state. If not,
 a new error response is sent (no fd and connection == NULL).

(backported from 9da6fae72b9883ab8dbbb4e681c4d4e96d6549e4)

---

 tcp_main.c |   51 +++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/tcp_main.c b/tcp_main.c
index 4840ddc..6a6c4c0 100644
--- a/tcp_main.c
+++ b/tcp_main.c
@@ -2050,14 +2050,25 @@ int tcp_send(struct dest_info* dst, union sockaddr_union* from,
 				do_close_fd=0;
 				goto release_c;
 			}
-			if (unlikely(c!=tmp)){
-				LOG(L_CRIT, "BUG: tcp_send: get_fd: got different connection:"
+			/* handle fd closed or bad connection/error
+				(it's possible that this happened in the time between
+				we found the intial connection and the time when we get
+				the fd)
+			 */
+			if (unlikely(c!=tmp || fd==-1 || c->state==S_CONN_BAD)){
+				if (unlikely(c!=tmp && tmp!=0))
+					BUG("tcp_send: get_fd: got different connection:"
 						"  %p (id= %d, refcnt=%d state=%d) != "
 						"  %p (n=%d)\n",
 						  c,   c->id,   atomic_get(&c->refcnt),   c->state,
 						  tmp, n
-				   );
+						);
 				n=-1; /* fail */
+				/* don't cache fd & close it */
+				do_close_fd = (fd==-1)?0:1;
+#ifdef TCP_FD_CACHE
+				use_fd_cache = 0;
+#endif /* TCP_FD_CACHE */
 				goto end;
 			}
 			DBG("tcp_send: after receive_fd: c= %p n=%d fd=%d\n",c, n, fd);
@@ -2721,6 +2732,12 @@ inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
 	struct send_fd_info* t;
 	
 	for (p=t=&q->data[0]; p<q->crt; p++){
+		if (unlikely(p->tcp_conn->state == S_CONN_BAD ||
+					 p->tcp_conn->flags & F_CONN_FD_CLOSED ||
+					 p->tcp_conn->s ==-1)) {
+			/* bad and/or already closed connection => remove */
+			goto rm_con;
+		}
 		if (unlikely(send_fd(p->unix_sock, &(p->tcp_conn),
 					sizeof(struct tcp_connection*), p->tcp_conn->s)<=0)){
 			if ( ((errno==EAGAIN)||(errno==EWOULDBLOCK)) && 
@@ -2736,7 +2753,11 @@ inline static void send_fd_queue_run(struct tcp_send_fd_q* q)
 						   p->unix_sock, (long)(p-&q->data[0]), p->retries,
 						   p->tcp_conn, p->tcp_conn->s, errno,
 						   strerror(errno));
+rm_con:
 #ifdef TCP_ASYNC
+				/* if a connection is on the send_fd queue it means it's
+				   not watched for read anymore => could be watched only for
+				   write */
 				if (p->tcp_conn->flags & F_CONN_WRITE_W){
 					io_watch_del(&io_h, p->tcp_conn->s, -1, IO_FD_CLOSING);
 					p->tcp_conn->flags &=~F_CONN_WRITE_W;
@@ -3006,6 +3027,7 @@ error:
 inline static int handle_ser_child(struct process_table* p, int fd_i)
 {
 	struct tcp_connection* tcpconn;
+	struct tcp_connection* tmp;
 	long response[2];
 	int cmd;
 	int bytes;
@@ -3101,9 +3123,26 @@ inline static int handle_ser_child(struct process_table* p, int fd_i)
 			/* send the requested FD  */
 			/* WARNING: take care of setting refcnt properly to
 			 * avoid race conditions */
-			if (unlikely(send_fd(p->unix_sock, &tcpconn, sizeof(tcpconn),
-								tcpconn->s)<=0)){
-				LOG(L_ERR, "ERROR: handle_ser_child: send_fd failed\n");
+			if (unlikely(tcpconn->state == S_CONN_BAD ||
+						(tcpconn->flags & F_CONN_FD_CLOSED) ||
+						tcpconn->s ==-1)) {
+				/* connection is already marked as bad and/or has no
+				   fd => don't try to send the fd (trying to send a
+				   closed fd _will_ fail) */
+				tmp = 0;
+				if (unlikely(send_all(p->unix_sock, &tmp, sizeof(tmp)) <= 0))
+					BUG("handle_ser_child: CONN_GET_FD: send_all failed\n");
+				/* no need to attempt to destroy the connection, it should
+				   be already in the process of being destroyed */
+			} else if (unlikely(send_fd(p->unix_sock, &tcpconn,
+										sizeof(tcpconn), tcpconn->s)<=0)){
+				LOG(L_ERR, "handle_ser_child: CONN_GET_FD:"
+							" send_fd failed\n");
+				/* try sending error (better then not sending anything) */
+				tmp = 0;
+				if (unlikely(send_all(p->unix_sock, &tmp, sizeof(tmp)) <= 0))
+					BUG("handle_ser_child: CONN_GET_FD:"
+							" send_fd send_all fallback failed\n");
 			}
 			break;
 		case CONN_NEW:




More information about the sr-dev mailing list