two bugs and a patch

Love lha at stacken.kth.se
Sun Jan 7 17:54:29 CET 2001


Nickolai Zeldovich <kolya at mit.edu> writes:

> FWIW, it seems like select() can potentially be called when in the
> rxi_ReapConnections loop (rxi_ReapConnections calls rxi_AckAll which
> can potentially send to the network, calling osi_NetSend).

Here is another patch for you to try:



Version: 1

If the call is free:ed retry the whole hash-bucket chain.

Index: rx.c
===================================================================
RCS file: /afs/stacken.kth.se/src/SourceRepository/arla/rx/rx.c,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -w -u -w -r1.19 -r1.20
--- rx.c	2001/01/06 23:04:51	1.19
+++ rx.c	2001/01/07 16:42:43	1.20
@@ -3239,9 +3243,10 @@
  * Check if a call needs to be destroyed.  Called by keep-alive code to ensure
  * that things are fine.  Also called periodically to guarantee that nothing
  * falls through the cracks (e.g. (error + dally) connections have keepalive
- * turned off.  Returns 0 if conn is well, -1 otherwise.  If otherwise, call
- * may be freed! 
+ * turned off.  Returns 0 if conn is well, negativ otherwise.
+ * -1 means that the call still exists, -2 means that the call is freed.
  */
+
 static int 
 rxi_CheckCall(struct rx_call *call)
 {
@@ -3257,17 +3262,21 @@
      * seconds.
      */
     if (now > (call->lastReceiveTime + conn->secondsUntilDead)) {
-	if (call->state == RX_STATE_ACTIVE)
+
+	if (call->state == RX_STATE_ACTIVE) {
 	    rxi_CallError(call, RX_CALL_DEAD);
-	else
+	    return -1;
+	} else {
 	    rxi_FreeCall(call);
+	    return -2;
+	}
 
 	/*
 	 * Non-active calls are destroyed if they are not responding to
 	 * pings; active calls are simply flagged in error, so the attached
 	 * process can die reasonably gracefully.
 	 */
-	return -1;
+	
     }
     /* see if we have a non-activity timeout */
     tservice = conn->service;
@@ -3591,13 +3600,15 @@
      */
     {
 	struct rx_connection **conn_ptr, **conn_end;
-	int i, havecalls = 0;
+	int i, havecalls = 0, ret;
 
 	for (conn_ptr = &rx_connHashTable[0],
 	     conn_end = &rx_connHashTable[rx_hashTableSize];
-	     conn_ptr < conn_end; conn_ptr++) {
+	     conn_ptr < conn_end; 
+	     conn_ptr++) {
 	    struct rx_connection *conn, *next;
 
+	rereap:
 	    for (conn = *conn_ptr; conn; conn = next) {
 		next = conn->next;
 		/* once a minute look at everything to see what's up */
@@ -3605,7 +3616,14 @@
 		for (i = 0; i < RX_MAXCALLS; i++) {
 		    if (conn->call[i]) {
 			havecalls = 1;
-			rxi_CheckCall(conn->call[i]);
+			ret = rxi_CheckCall(conn->call[i]);
+			if (ret == -2) {
+			    /* If CheckCall freed the call, it might
+			     * have destroyed  the connection as well,
+			     * which screws up the linked lists.
+			     */
+			    goto rereap;
+			}
 		    }
 		}
 		if (conn->type == RX_SERVER_CONNECTION) {



Love





More information about the Arla-drinkers mailing list