arlad segfaults : arla-0.33-pre2 on Linux 2.3.99-pre6

Assar Westerlund assar at stacken.kth.se
Sat May 6 14:55:44 CEST 2000


"Helge MEINHARD, CERN-EP" <Helge.Meinhard at cern.ch> writes:
> Then, ls /afs still works, but when I do ls /afs/cern.ch for
> example, then arlad aborts with a segmentation fault, leaving no
> core behind. [I tried attaching gdb, but when I do that even the
> mount of /afs does not work.]

It's probably not very important in this case, but the usual way of
handling this is to start arlad in gdb, preferably with `run -nz'.
Then you should be able to what has happened with it crashes.

In this case, the problem is an oops, that was forced by the kernel,
and that's what causes arlad to segfault.

> May  5 23:10:07 pcch75 kernel: kernel BUG at dcache.c:664! 
> May  5 23:10:07 pcch75 kernel: invalid operand: 0000 
> May  5 23:10:07 pcch75 kernel: CPU:    1 
> May  5 23:10:07 pcch75 kernel: EIP:    0010:[d_lookup+125/248] 
> May  5 23:10:07 pcch75 kernel: EFLAGS: 00010286 
> May  5 23:10:07 pcch75 kernel: eax: 0000001c   ebx: c51e74e0   ecx: c02d9f84   edx: c02d9f84 
> May  5 23:10:07 pcch75 kernel: esi: c6080020   edi: c51e7518   ebp: c54b9698   esp: c6653eec 
> May  5 23:10:07 pcch75 kernel: ds: 0018   es: 0018   ss: 0018 
> May  5 23:10:07 pcch75 kernel: Process arlad (pid: 596, stackpage=c6653000) 
> May  5 23:10:07 pcch75 kernel: Stack: c0275d7d c0275f1c 00000298 c51e74e0 c8850e40 c51e7518 c7f38d80 c7fcb678  
> May  5 23:10:07 pcch75 kernel:        c6080020 6524c858 00000007 c88485d2 c51e74e0 c6653f44 c6080000 00000000  
> May  5 23:10:07 pcch75 kernel:        00000000 c66a0000 c4f88090 c6653f44 c6080020 00000000 c6080020 00000007  
> May  5 23:10:07 pcch75 kernel: Call Trace: [tvecs+32481/147012] [tvecs+32896/147012] [<c8850e40>] [<c88485d2>] [<c
> 884732c>] [<c8846982>] [sys_write+270/304]  
> May  5 23:10:07 pcch75 kernel:        [system_call+52/56]  
> May  5 23:10:08 pcch75 kernel: Code: 0f 0b 83 c4 0c eb 61 8b 6d 00 8b 74 24 18 39 73 48 75 55 8b  

This is the problem.  I think this is telling us that we do locking
related to the dcache wrong.  

Can you try appending the following patch?  Some more investigation
and testing is probably needed to figure when and how this should be
done, but this is probably a start.

/assar



Index: xfs_dev.c
===================================================================
RCS file: /afs/stacken.kth.se/src/SourceRepository/arla/xfs/linux/xfs_dev.c,v
retrieving revision 1.67
diff -u -w -u -w -r1.67 xfs_dev.c
--- xfs_dev.c	2000/04/30 20:46:50	1.67
+++ xfs_dev.c	2000/05/06 12:54:25
@@ -201,16 +201,22 @@
 /*
  * Only allow one open.
  */
-static int xfs_devopen(struct inode *inode, struct file *file)
+static int
+xfs_devopen(struct inode *inode, struct file *file)
 {
     struct xfs_channel *chan;
     struct xfs *xfsp = &xfs[MINOR(inode->i_rdev)];
+    int ret = 0;
     
+    lock_kernel();
+
     XFSDEB(XDEBDEV, ("xfs_devopen dev = %d, flags = %d\n",
 		     inode->i_rdev, file->f_flags));
     
-    if (MINOR(inode->i_rdev) >=NXFS)
-	return -ENXIO;
+    if (MINOR(inode->i_rdev) >=NXFS) {
+	ret = -ENXIO;
+	goto out;
+    }
 
     MOD_INC_USE_COUNT;
     
@@ -219,13 +225,15 @@
     /* Only allow one reader/writer */
     if (chan->status & CHANNEL_OPENED) {
 	MOD_DEC_USE_COUNT;
-	return -EBUSY;
+	ret = -EBUSY;
+	goto out;
     }
 
     chan->message_buffer = xfs_alloc(MAX_XMSG_SIZE, XFS_MEM_MSGBUF);
     if (chan->message_buffer == NULL) {
 	MOD_DEC_USE_COUNT;
-	return -ENOMEM;
+	ret = -ENOMEM;
+	goto out;
     }
     
     chan->status |= CHANNEL_OPENED;
@@ -233,7 +241,9 @@
 
     INIT_LIST_HEAD(&chan->inactive_list);
 
-    return 0;
+ out:
+    unlock_kernel();
+    return ret;
 }
 
 static int
@@ -242,6 +252,8 @@
     struct xfs_channel *chan = &xfs_channel[MINOR(inode->i_rdev)];
     struct xfs_link *first;
 
+    lock_kernel ();
+
     XFSDEB(XDEBDEV, ("xfs_devclose dev = %d, flags = %d\n",
 		     inode->i_rdev, file->f_flags));
     
@@ -289,6 +301,7 @@
     free_all_xfs_nodes(&xfs[MINOR(inode->i_rdev)]);
     
     MOD_DEC_USE_COUNT;
+    unlock_kernel ();
     return 0;
 }
 
@@ -304,6 +317,8 @@
     struct xfs_link *first;
     int ret = 0;
     
+    lock_kernel ();
+
     XFSDEB(XDEBDEV, ("xfs_devread: m = %p, m->prev = %p, m->next = %p\n",
 		     &chan->messageq, chan->messageq.prev,
 		     chan->messageq.next));
@@ -335,6 +350,7 @@
     
     *ppos += ret;
     
+    unlock_kernel ();
     return ret;
 }
 
@@ -352,6 +368,8 @@
     struct xfs_message_header *msg_buf;
     int ret = 0;
     
+    lock_kernel ();
+
     XFSDEB(XDEBDEV, ("xfs_devwrite\n"));
     
     if (count > MAX_XMSG_SIZE)
@@ -387,6 +405,7 @@
     if (ret >= 0)
 	*ppos += ret;
     
+    unlock_kernel ();
     return ret;
 }
 
@@ -407,7 +426,10 @@
 {
     kdev_t dev = file->f_dentry->d_inode->i_rdev;
     struct xfs_channel *chan = &xfs_channel[MINOR(dev)];
+    int ret = 0;
     
+    lock_kernel ();
+
     poll_wait(file, &chan->wait_queue, wait);
     
     xfs_process_inactive_queue (chan);
@@ -415,10 +437,12 @@
     down(&chan->channel_sem);
     if (!xfs_emptyq(&chan->messageq)) {
 	up(&chan->channel_sem);
-	return POLLIN;                    /* Something to read */
-    }
+	ret = POLLIN;
+    } else {
     up(&chan->channel_sem);
-    return 0;
+    }
+    unlock_kernel ();
+    return ret;
 }
 
 /*





More information about the Arla-drinkers mailing list