two bugs and a patch
Love
lha at stacken.kth.se
Sat Jan 6 23:22:34 CET 2001
Nickolai Zeldovich <kolya at mit.edu> writes:
> It looks like rxi_DecongestionEvent decrements the refcount on its
> rx_peer too early, so it can potentially be recycled while it is
> still running (arlad crashed on me once earlier today, and rx_peer
> was filled with 0xAA, which my osi_Free memsets all freed memory to.)
> Attached below is a patch which should fix this.
That might be true if rxi_Start() blocks, I've applied the patch since it
seems more right. It seems to be that it might be a problem if
rxi_DecongestionEvent() can run concurrently for same peer, that might
cause problems. I just can't figure out if its happening.
> Should there be some per-entry locking on rx_connHashTable, and maybe
> some other hash tables in arla as well? It looks like LWP is preemptive
> so something of this sort would be required, unless I'm confused.
LWP isn't preemptive, its cooperative. Locking is needed when a thread can
block in select or yield. In case you have multi-homed fileservers, the
patch attached might help you.
Love
Version: 2
When changing the peer, update all connections using the old peer.
Index: rx.c
===================================================================
RCS file: /afs/stacken.kth.se/src/SourceRepository/arla/rx/rx.c,v
retrieving revision 1.17
diff -u -w -r1.17 rx/rx.c
--- rx/rx.c 2001/01/05 20:42:10 1.17
+++ rx/rx.c 2001/01/06 22:03:04
@@ -306,6 +306,7 @@
conn->cid = cid;
conn->epoch = rx_epoch;
conn->peer = rxi_FindPeer(shost, sport);
+ queue_Append(&conn->peer->connQueue, conn);
conn->serviceId = sservice;
conn->securityObject = securityObject;
conn->securityData = (void *) 0;
@@ -423,6 +424,7 @@
/* Make sure that the connection is completely reset before deleting it. */
rxi_ResetConnection(conn);
+ queue_Remove(conn);
if (--conn->peer->refCount == 0)
conn->peer->idleWhen = clock_Sec();
@@ -1262,6 +1264,7 @@
rxi_DestroyPeer(struct rx_peer * peer)
{
rxi_RemovePeer(peer);
+ assert(queue_IsEmpty(&peer->connQueue));
rxi_FreePeer(peer);
rx_stats.nPeerStructs--;
}
@@ -1282,9 +1285,19 @@
break;
}
if (pp != NULL) {
+ struct rx_connection *conn, *next;
+
pp->refCount += peer->refCount;
pp->nSent += peer->nSent;
pp->reSends += peer->reSends;
+
+ for (queue_Scan(&peer->connQueue, conn, next, rx_connection)) {
+ conn->peer = pp;
+ queue_Remove(conn);
+ queue_Append(&pp->connQueue, conn);
+ }
+
+ assert(queue_IsEmpty(&peer->connQueue));
rxi_FreePeer(peer);
rx_stats.nPeerStructs--;
return pp;
@@ -1361,6 +1374,7 @@
conn->next = rx_connHashTable[hashindex];
rx_connHashTable[hashindex] = conn;
conn->peer = rxi_FindPeer(host, port);
+ queue_Append(&conn->peer->connQueue, conn);
conn->maxPacketSize = MIN(conn->peer->packetSize, OLD_MAX_PACKET_SIZE);
conn->type = RX_SERVER_CONNECTION;
conn->lastSendTime = clock_Sec(); /* don't GC immediately */
Index: rx.h
===================================================================
RCS file: /afs/stacken.kth.se/src/SourceRepository/arla/rx/rx.h,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -w -u -w -r1.18 -r1.19
--- rx/rx.h 2000/11/26 19:42:14 1.18
+++ rx/rx.h 2001/01/06 21:54:20 1.19
@@ -422,6 +422,7 @@
*/
struct rx_peer {
struct rx_peer *next; /* Next in hash conflict or free list */
+ struct rx_queue connQueue; /* a list of all conn use this peer */
u_long host; /* Remote IP address, in net byte
* order */
u_short port; /* Remote UDP port, in net byte order */
@@ -489,6 +490,7 @@
* limited multiple asynchronous conversations.
*/
struct rx_connection {
+ struct rx_queue queue_item; /* conns on same peer */
struct rx_connection *next; /* on hash chain _or_ free list */
struct rx_peer *peer;
#ifdef RX_ENABLE_LOCKS
More information about the Arla-drinkers
mailing list