Thread: [SSI-devel] SUSE: onnode problem

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

On a 2 node SuSE cluster, I am facing a problem with onnode.

When I run any command on node 2 from node 1 using onnode, onnode fails
due to failure of rexec syscall (strace output given at the end)

# onnode 2 /bin/ls
can't execute /bin/ls, errno=116. (ESTALE)

# onnode 2 ls
ls not found.

Cluster is running a pre-build openssi FC2 kernel with openssi-tools and
cluster-tools rpms installed (no other openssi rpms present)

Both the nodes have been transitioned into UP state manually by
clusternode_setstate command.

Any idea what might be happenning here ?

Regards,
Bharata.

cluster -V looks like this:

Node 1:
        State:  UP
        Previous state:  COMINGUP
        Reason for last transition:  API
        Last transition ID:  2
        Last transition time:  Sat Nov 27 01:02:14.173936 2004
        First transition ID:  1
        First transition time:  Fri Nov 26 07:48:18.150000 2004
        Number of CPUs:  4
        Number of CPUs online:  4
Node 2:
        State:  UP
        Previous state:  COMINGUP
        Reason for last transition:  API
        Last transition ID:  4
        Last transition time:  Sat Nov 27 01:09:54.543936 2004
        First transition ID:  3
        First transition time:  Sat Nov 27 01:07:23.403936 2004
        Number of CPUs:  1
        Number of CPUs online:  1

/proc/mounts is like this:

rootfs / rootfs rw 0 0
/dev/root /initrd ext2 rw 0 0
/dev/root / cfs rw 0 0
none /cluster/node1/dev cfs rw 0 0
none /dev cfs rw 0 0
proc /proc proc rw 0 0
devpts /cluster/dev/pts devpts rw 0 0
/dev/cciss/c0d0p5 /boot ext3 rw 0 0
/dev/cciss/c0d0p9 /home ext3 rw 0 0
/dev/cciss/c0d0p8 /usr ext3 rw 0 0
tmpfs /dev/shm tmpfs rw 0 0
usbdevfs /proc/bus/usb usbdevfs rw 0 0
00000037 /cluster/node2/dev cfs rw 0 0
00000037 /dev cfs rw 0 0

strace of 'onnode 2 /bin/ls'

execve("/bin/onnode", ["onnode", "2", "/bin/ls"], [/* 54 vars */]) = 0
uname({sys="Linux", node="pushya4", ...}) = 0
brk(0)                                  = 0x863b000
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0x3ff07000
open("/etc/ld.so.preload", O_RDONLY)    = -1 ENOENT (No such file or
directory)
open("//lib/tls/i686/mmx/libcluster.so.0", O_RDONLY) = -1 ENOENT (No
such file or directory)
stat64("//lib/tls/i686/mmx", 0xbfe26910) = -1 ENOENT (No such file or
directory)
open("//lib/tls/i686/libcluster.so.0", O_RDONLY) = -1 ENOENT (No such
file or directory)
stat64("//lib/tls/i686", 0xbfe26910)    = -1 ENOENT (No such file or
directory)
open("//lib/tls/mmx/libcluster.so.0", O_RDONLY) = -1 ENOENT (No such
file or directory)
stat64("//lib/tls/mmx", 0xbfe26910)     = -1 ENOENT (No such file or
directory)
open("//lib/tls/libcluster.so.0", O_RDONLY) = -1 ENOENT (No such file or
directory)
stat64("//lib/tls", 0xbfe26910)         = -1 ENOENT (No such file or
directory)
open("//lib/i686/mmx/libcluster.so.0", O_RDONLY) = -1 ENOENT (No such
file or directory)
stat64("//lib/i686/mmx", 0xbfe26910)    = -1 ENOENT (No such file or
directory)
open("//lib/i686/libcluster.so.0", O_RDONLY) = -1 ENOENT (No such file
or directory)
stat64("//lib/i686", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
open("//lib/mmx/libcluster.so.0", O_RDONLY) = -1 ENOENT (No such file or
directory)
stat64("//lib/mmx", 0xbfe26910)         = -1 ENOENT (No such file or
directory)
open("//lib/libcluster.so.0", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\\\26\0"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=16940, ...}) = 0
old_mmap(NULL, 19876, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x5cb000
old_mmap(0x5cf000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3,
0x3000) = 0x5cf000
close(3)                                = 0
open("//lib/i686/libc.so.6", O_RDONLY)  = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320]\1"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1461208, ...}) = 0
old_mmap(NULL, 1256644, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) =
0xb21000
old_mmap(0xc4d000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED,
3, 0x12c000) = 0xc4d000
old_mmap(0xc52000, 7364, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xc52000
close(3)                                = 0
SYS_290(0xbfe27270, 0x14, 0x804a66c, 0x5cc724, 0x5cfcb0) = 125
SYS_290(0xbfe27200, 0x14, 0x3ff07c60, 0xb2910c, 0x5cfcb0) = 0
SYS_292(0xbff08796, 0xbfe2731c, 0xbfe27324, 0x2, 0xe87290) = -1 ESTALE
(Stale NFS file handle)
write(2, " can\'t execute /bin/ls, errno=11"..., 35 can't execute
/bin/ls, errno=116.
) = 35
exit_group(-1)                          = ?

Thread: [SSI-devel] SUSE: onnode problem

ssic-linux-devel