Related
I have the following 3 line example to show the error:
use strict;
use warnings;
use Wx;
It works fine when I run it with perl -w hue5.pl. However, when I add the debugging option and start if with perl -w -d hue5.pl, a problem emerges and perl runtime crashes saying:
Loading DB routines from perl5db.pl version 1.55
Editor support available.
Enter h or 'h h' for help, or 'man perldebug' for more help.
Wx::CODE(0x5604c197ca80)(/usr/lib64/perl5/vendor_perl/Wx.pm:154):
154: UnsetConstants() if defined &UnsetConstants;
double free or corruption (!prev)
Aborted (core dumped)
I use perl v5.30.3 on 64 bit Linux (up-to-date Fedora release 31) and Wx installed from the binary package perl-Wx-0.9932-15.fc31.x86_64.rpm.
Is anybody else having this issue?
Short version: Uninstalling the package perl-Term-ReadLine-Gnu solved the issue.
Longer version: I saw that the issue does not always happen. In fact, it seems to depend on the size of the process environment. At one point of debugging I could make the crash appear by declaring in the bash (note: without export the issue does not show)
export SOMEVAR=xyz
and disappear by
unset SOMEVAR
Finally, during further experimentation, I once got a more elaborate failure message pointing right to Readline/Gnu:
Signal SEGV at /usr/lib64/perl5/vendor_perl/Term/ReadLine/Gnu.pm line 504.
Term::ReadLine::Gnu::ornaments(Term::ReadLine=HASH(0x5593c12e5190), 1) > called at /usr/lib64/perl5/vendor_perl/Term/ReadLine/Gnu.pm line 301
Term::ReadLine::Gnu::new("Term::ReadLine", "perldb", GLOB(0x5593c062df58), GLOB(0x5593c05c6ea8)) called at /usr/share/perl5/perl5db.pl line 6865
DB::setterm() called at /usr/share/perl5/perl5db.pl line 1849
DB::_DB__read_next_cmd(undef) called at /usr/share/perl5/perl5db.pl line 2789
DB::DB called at f.pl line 5
For the curious, I note that line 504 in /usr/lib64/perl5/vendor_perl/Term/ReadLine/Gnu.pm was:
return Term::ReadLine::Gnu::XS::ornaments(#_);
For the even more curious I add strace dumps generated by runs which differ only with regard to the absence/presence of SOMEVAR in the environment (note that the name does not seem to have importance, I first got into the direction of environment differences by observing different behaviour depending on OLDPWD).
First without the issue (i.e. without variable SOMEVAR), quoting the lines as of loading Readline/Gnu library:
stat("/usr/lib65/perl5/vendor_perl/auto/Term/ReadLine/Gnu/XS/autosplit.ix", {st_mode=S_IFREG|0644, st_size=1137, ...}) = 0
openat(AT_FDCWD, "/usr/lib64/perl5/vendor_perl/auto/Term/ReadLine/Gnu/XS/autosplit.ix", O_RDONLY|O_CLOEXEC) = 13
ioctl(13, TCGETS, 0x7fff0a1cac70) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(13, 0, SEEK_CUR) = 0
read(13, "# Index created by AutoSplit for"..., 8192) = 1137
read(13, "", 8192) = 0
close(13) = 0
lseek(3, 2189, SEEK_SET) = 2189
lseek(3, 0, SEEK_CUR) = 2189
close(3) = 0
getuid() = 1000
geteuid() = 1000
getgid() = 1000
getegid() = 1000
fcntl(4, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
stat("/home/bernhard/.terminfo", 0x55ec16193000) = -1 ENOENT (No such file or directory)
stat("/etc/terminfo", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
stat("/usr/share/terminfo", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
access("/etc/terminfo/x/xterm-256color", R_OK) = -1 ENOENT (No such file or directory)
access("/usr/share/terminfo/x/xterm-256color", R_OK) = 0
openat(AT_FDCWD, "/usr/share/terminfo/x/xterm-256color", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=3808, ...}) = 0
read(3, "\36\2%\0&\0\17\0\235\0010\6xterm-256color|xterm"..., 32768) = 3808
read(3, "", 28672) = 0
close(3) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TIOCGWINSZ, {ws_row=43, ws_col=173, ws_xpixel=0, ws_ypixel=0}) = 0
ioctl(4, TIOCGWINSZ, {ws_row=43, ws_col=173, ws_xpixel=0, ws_ypixel=0}) = 0
ioctl(4, TIOCGWINSZ, {ws_row=43, ws_col=173, ws_xpixel=0, ws_ypixel=0}) = 0
ioctl(4, TIOCSWINSZ, {ws_row=43, ws_col=173, ws_xpixel=0, ws_ypixel=0}) = 0
ioctl(4, TCGETS, {B38400 opost isig icanon echo ...}) = 0
openat(AT_FDCWD, "/home/bernhard/.inputrc", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/inputrc", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=943, ...}) = 0
read(3, "# do not bell on tab-completion\n"..., 943) = 943
close(3) = 0
brk(NULL) = 0x55ec1640f000
brk(0x55ec16430000) = 0x55ec16430000
stat("/usr/lib64/perl5/vendor_perl/auto/Term/ReadLine/Gnu/XS/ornaments.al", {st_mode=S_IFREG|0644, st_size=1351, ...}) = 0
geteuid() = 1000
geteuid() = 1000
getegid() = 1000
getgroups(0, NULL) = 2
getgroups(2, [18, 1000]) = 2
stat("/usr/lib64/perl5/vendor_perl/auto/Term/ReadLine/Gnu/XS/ornaments.al", {st_mode=S_IFREG|0644, st_size=1351, ...}) = 0
openat(AT_FDCWD, "/usr/lib64/perl5/vendor_perl/auto/Term/ReadLine/Gnu/XS/ornaments.al", O_RDONLY|O_CLOEXEC) = 3
ioctl(3, TCGETS, 0x7fff0a1cb9d0) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(3, 0, SEEK_CUR) = 0
read(3, "# NOTE: Derived from blib/lib/Te"..., 8192) = 1351
read(3, "", 8192) = 0
close(3) = 0
getpid() = 4060
getpid() = 4060
ioctl(4, TIOCGWINSZ, {ws_row=43, ws_col=173, ws_xpixel=0, ws_ypixel=0}) = 0
ioctl(4, TIOCSWINSZ, {ws_row=43, ws_col=173, ws_xpixel=0, ws_ypixel=0}) = 0
ioctl(4, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(4, SNDCTL_TMR_STOP or TCSETSW, {B38400 opost isig -icanon -echo ...}) = 0
rt_sigprocmask(SIG_BLOCK, [HUP INT QUIT ALRM TERM TSTP TTIN TTOU], [], 8) = 0
rt_sigaction(SIGINT, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGHUP, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGQUIT, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGALRM, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGTSTP, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGTTOU, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigaction(SIGTTIN, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigaction(SIGWINCH, {sa_handler=0x7f4433da22f0, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f44490e46b0}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
fstat(5, {st_mode=S_IFCHR|0666, st_rdev=makedev(0x5, 0), ...}) = 0
ioctl(5, TCGETS, {B38400 opost isig -icanon -echo ...}) = 0
write(5, "\33[4m DB<1> \33[24m", 17) = 17
pselect6(5, [4], NULL, NULL, NULL, {[], 8}) = 1 (in [4])
read(4, "q", 1) = 1
select(5, [4], NULL, [4], {tv_sec=0, tv_usec=0}) = 0 (Timeout)
write(5, "q", 1) = 1
pselect6(5, [4], NULL, NULL, NULL, {[], 8}) = 1 (in [4])
read(4, "\r", 1) = 1
write(5, "\n", 1) = 1
ioctl(4, SNDCTL_TMR_STOP or TCSETSW, {B38400 opost isig icanon echo ...}) = 0
rt_sigaction(SIGINT, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGHUP, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGQUIT, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGALRM, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGTSTP, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGTTOU, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGTTIN, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da2e70, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, 8) = 0
rt_sigaction(SIGWINCH, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f44490e46b0}, {sa_handler=0x7f4433da22f0, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f44490e46b0}, 8) = 0
close(4) = 0
close(5) = 0
exit_group(0) = ?
+++ exited with 0 +++
Second with the issue (i.e. wit exported variable SOMEVAR), again quoting only the lines as of loading Readline/Gnu library:
stat("/usr/lib64/perl5/vendor_perl/auto/Term/ReadLine/Gnu/XS/autosplit.ix", {st_mode=S_IFREG|0644, st_size=1137, ...}) = 0
openat(AT_FDCWD, "/usr/lib64/perl5/vendor_perl/auto/Term/ReadLine/Gnu/XS/autosplit.ix", O_RDONLY|O_CLOEXEC) = 13
ioctl(13, TCGETS, 0x7ffe8bf62e00) = -1 ENOTTY (Inappropriate ioctl for device)
lseek(13, 0, SEEK_CUR) = 0
read(13, "# Index created by AutoSplit for"..., 8192) = 1137
read(13, "", 8192) = 0
close(13) = 0
lseek(3, 2189, SEEK_SET) = 2189
lseek(3, 0, SEEK_CUR) = 2189
close(3) = 0
getuid() = 1000
geteuid() = 1000
getgid() = 1000
getegid() = 1000
fcntl(4, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fcntl(5, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
stat("/home/bernhard/.terminfo", 0x55b600a2b5a0) = -1 ENOENT (No such file or directory)
stat("/etc/terminfo", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
stat("/usr/share/terminfo", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
access("/etc/terminfo/x/xterm-256color", R_OK) = -1 ENOENT (No such file or directory)
access("/usr/share/terminfo/x/xterm-256color", R_OK) = 0
openat(AT_FDCWD, "/usr/share/terminfo/x/xterm-256color", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=3808, ...}) = 0
read(3, "\36\2%\0&\0\17\0\235\0010\6xterm-256color|xterm"..., 32768) = 3808
read(3, "", 28672) = 0
close(3) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TIOCGWINSZ, {ws_row=43, ws_col=173, ws_xpixel=0, ws_ypixel=0}) = 0
ioctl(4, TIOCGWINSZ, {ws_row=43, ws_col=173, ws_xpixel=0, ws_ypixel=0}) = 0
writev(2, [{iov_base="double free or corruption (!prev"..., iov_len=33}, {iov_base="\n", iov_len=1}], 2) = 34
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f376a2e3000
rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, ~[RTMIN RT_1], [], 8) = 0
getpid() = 4029
gettid() = 4029
tgkill(4029, 4029, SIGABRT) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
--- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=4029, si_uid=1000} ---
+++ killed by SIGABRT (core dumped) +++
I'm trying to set up a lustre client (docs) inside a docker container running on BlueData.
As per this post, I've modified the BlueData config on each worker and the controller node:
$ vi /opt/bluedata/common-install/bd_mgmt/releases/1/sys.config
I added the SYS_ADMIN capability:
{allowed_docker_caps, ["SETPCAP",
"SYS_ADMIN",
...
And rebooted the host.
Next, I provisioned a Centos 7.x cluster in BlueData:
CentOS 7.x with no pre-packaged apps or software
Image Version: 2.2
Distro ID: bluedata/centos7
Then I ssh'd into the Centos container:
$ ssh -o StrictHostKeyChecking=no -i /Users/me/.ssh/id_rsa centos#x.x.x.x
Inside the container, I install the lustre client:
sudo yum install \
kernel \
kernel-devel \
kernel-headers \
kernel-abi-whitelists \
kernel-tools \
kernel-tools-libs \
kernel-tools-libs-devel
cat >/tmp/lustre-repo.conf <<\__EOF
[lustre-server]
name=lustre-server
baseurl=https://downloads.whamcloud.com/public/lustre/latest-release/el7/server
gpgcheck=0
[lustre-client]
name=lustre-client
baseurl=https://downloads.whamcloud.com/public/lustre/latest-release/el7/client
gpgcheck=0
[e2fsprogs-wc]
name=e2fsprogs-wc
baseurl=https://downloads.whamcloud.com/public/e2fsprogs/latest/el7
gpgcheck=0
__EOF
sudo mv /tmp/lustre-repo.conf /etc/yum.repos.d/lustre.repo
sudo reboot
sudo yum install epel-release
sudo yum --nogpgcheck --enablerepo=lustre-client install lustre-client-dkms lustre-client
sudo reboot
However, I receive an error when I try to load the lustre module:
$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz
modprobe: ERROR: could not insert 'lustre': Operation not permitted
I have checked the kernel version:
[bluedata#bluedata-2 ~]$ uname -a
Linux bluedata-2.bdlocal 3.10.0-957.21.3.el7.x86_64 #1 SMP Tue Jun 18 16:35:19 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux
The lustre version I installed is 2.12:
kmod-lustre-client.x86_64 2.12.2-1.el7 #lustre-client
lustre-client.x86_64 2.12.2-1.el7 #lustre-client
Update 1
No errors are shown with dmesg:
[bluedata#bluedata-3 ~]$ dmesg -c
[bluedata#bluedata-3 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz
modprobe: ERROR: could not insert 'lustre': Operation not permitted
[bluedata#bluedata-3 ~]$ dmesg
Update 2
$ sudo strace modprobe lustre
Outputs:
execve("/sbin/modprobe", ["modprobe", "lustre"], [/* 16 vars */]) = 0
brk(NULL) = 0x1648000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff2000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=22387, ...}) = 0
mmap(NULL, 22387, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458fec000
close(3) = 0
open("/lib64/liblzma.so.5", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\2000\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=157424, ...}) = 0
mmap(NULL, 2249352, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458bac000
mprotect(0x7f4458bd1000, 2093056, PROT_NONE) = 0
mmap(0x7f4458dd0000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x24000) = 0x7f4458dd0000
close(3) = 0
open("/lib64/libz.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20!\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=90248, ...}) = 0
mmap(NULL, 2183272, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458996000
mprotect(0x7f44589ab000, 2093056, PROT_NONE) = 0
mmap(0x7f4458baa000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x7f4458baa000
close(3) = 0
open("/lib64/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220*\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=88776, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458feb000
mmap(NULL, 2184192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458780000
mprotect(0x7f4458795000, 2093056, PROT_NONE) = 0
mmap(0x7f4458994000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x7f4458994000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240%\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2151672, ...}) = 0
mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f44583b3000
mprotect(0x7f4458575000, 2097152, PROT_NONE) = 0
mmap(0x7f4458775000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x7f4458775000
mmap(0x7f445877b000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f445877b000
close(3) = 0
open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\260l\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=141968, ...}) = 0
mmap(NULL, 2208904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458197000
mprotect(0x7f44581ae000, 2093056, PROT_NONE) = 0
mmap(0x7f44583ad000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7f44583ad000
mmap(0x7f44583af000, 13448, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f44583af000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458fea000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458fe8000
arch_prctl(ARCH_SET_FS, 0x7f4458fe8740) = 0
mprotect(0x7f4458775000, 16384, PROT_READ) = 0
mprotect(0x7f44583ad000, 4096, PROT_READ) = 0
mprotect(0x7f4458994000, 4096, PROT_READ) = 0
mprotect(0x7f4458baa000, 4096, PROT_READ) = 0
mprotect(0x7f4458dd0000, 4096, PROT_READ) = 0
mprotect(0x621000, 4096, PROT_READ) = 0
mprotect(0x7f4458ff3000, 4096, PROT_READ) = 0
munmap(0x7f4458fec000, 22387) = 0
set_tid_address(0x7f4458fe8a10) = 1264
set_robust_list(0x7f4458fe8a20, 24) = 0
rt_sigaction(SIGRTMIN, {0x7f445819d790, [], SA_RESTORER|SA_SIGINFO, 0x7f44581a65d0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0x7f445819d820, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7f44581a65d0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
brk(NULL) = 0x1648000
brk(0x1669000) = 0x1669000
brk(NULL) = 0x1669000
uname({sysname="Linux", nodename="bluedata-3.bdlocal", ...}) = 0
stat("/etc/modprobe.d", {st_mode=S_IFDIR|0755, st_size=54, ...}) = 0
openat(AT_FDCWD, "/etc/modprobe.d", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries */, 32768) = 128
newfstatat(3, "dccp-blacklist.conf", {st_mode=S_IFREG|0644, st_size=215, ...}, 0) = 0
newfstatat(3, "ko2iblnd.conf", {st_mode=S_IFREG|0644, st_size=999, ...}, 0) = 0
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
stat("/run/modprobe.d", 0x7ffcc1e0a640) = -1 ENOENT (No such file or directory)
stat("/lib/modprobe.d", {st_mode=S_IFDIR|0755, st_size=6, ...}) = 0
openat(AT_FDCWD, "/lib/modprobe.d", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 2 entries */, 32768) = 48
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
open("/etc/modprobe.d/dccp-blacklist.conf", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=215, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# DCCP is considered a potential"..., 4096) = 215
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/etc/modprobe.d/ko2iblnd.conf", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=999, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# Currently it isn't possible to"..., 4096) = 999
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.softdep", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=518, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# Soft dependencies extracted fr"..., 4096) = 518
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/proc/cmdline", O_RDONLY|O_CLOEXEC) = 3
read(3, "BOOT_IMAGE=/boot/vmlinuz-3.10.0-"..., 4095) = 193
read(3, "", 3902) = 0
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.dep.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=382199, ...}) = 0
mmap(NULL, 382199, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458f8a000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.alias.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=802187, ...}) = 0
mmap(NULL, 802187, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458ec6000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.symbols.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=537967, ...}) = 0
mmap(NULL, 537967, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458e42000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.builtin.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=9332, ...}) = 0
mmap(NULL, 9332, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458fef000
close(3) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz", {st_mode=S_IFREG|0644, st_size=58688, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz", {st_mode=S_IFREG|0644, st_size=81772, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz", {st_mode=S_IFREG|0644, st_size=11592, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz", {st_mode=S_IFREG|0644, st_size=133688, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz", {st_mode=S_IFREG|0644, st_size=101472, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz", {st_mode=S_IFREG|0644, st_size=14600, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz", {st_mode=S_IFREG|0644, st_size=369448, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/obdclass.ko.xz", {st_mode=S_IFREG|0644, st_size=270652, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lnet.ko.xz", {st_mode=S_IFREG|0644, st_size=174800, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/libcfs.ko.xz", {st_mode=S_IFREG|0644, st_size=88252, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", {st_mode=S_IFREG|0644, st_size=2028, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
open("/sys/module/lustre/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/sys/module/lustre", 0x7ffcc1e0a5c0) = -1 ENOENT (No such file or directory)
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_pclmul/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_generic/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/sys/module/crct10dif_generic", 0x7ffcc1e0a5c0) = -1 ENOENT (No such file or directory)
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz", O_RDONLY|O_CLOEXEC) = 3
read(3, "\3757zXZ\0", 6) = 6
lseek(3, 0, SEEK_SET) = 0
read(3, "\3757zXZ\0\0\4\346\326\264F\2\0!\1\26\0\0\0t/\345\243\340\30l\6\267]\0?"..., 8192) = 1784
mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4457996000
read(3, "", 8192) = 0
munmap(0x7f4457996000, 8392704) = 0
init_module(0x1653f40, 6253, "") = -1 ENOSYS (Function not implemented)
open("/sys/module/crc_t10dif/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
stat("/sys/module/crc_t10dif", 0x7ffcc1e0a5c0) = -1 ENOSYS (Function not implemented)
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e0b5f0, 6) = -1 ENOSYS (Function not implemented)
lseek(4, 0, SEEK_SET) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e074e0, 8192) = -1 ENOSYS (Function not implemented)
brk(NULL) = -1 ENOSYS (Function not implemented)
brk(0x1e7d000) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e074e0, 8192) = -1 EPERM (Operation not permitted)
close(3) = 0
write(2, "modprobe: ERROR: could not inser"..., 68modprobe: ERROR: could not insert 'lustre': Operation not permitted
) = 68
close(4) = 0
munmap(0x7f4458f8a000, 382199) = 0
munmap(0x7f4458ec6000, 802187) = 0
munmap(0x7f4458e42000, 537967) = 0
munmap(0x7f4458fef000, 9332) = 0
exit_group(1) = ?
+++ exited with 1 +++
Update 3
I tried installing the kmod package instead of dkms:
Running transaction
Installing : kmod-lustre-client-2.12.2-1.el7.x86_64 1/1
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/null': Operation not permitted
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/kmsg': Operation not permitted
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/console': Operation not permitted
Verifying : kmod-lustre-client-2.12.2-1.el7.x86_64 1/1
Installed:
kmod-lustre-client.x86_64 0:2.12.2-1.el7
Complete!
I then tried again sudo strace modprobe lustre:
...
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450be5f0, 6) = -1 ENOSYS (Function not implemented)
lseek(4, 0, SEEK_SET) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450ba4e0, 8192) = -1 ENOSYS (Function not implemented)
brk(NULL) = -1 ENOSYS (Function not implemented)
brk(0x1410000) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450ba4e0, 8192) = -1 EPERM (Operation not permitted)
close(3) = 0
write(2, "modprobe: ERROR: could not inser"..., 68modprobe: ERROR: could not insert 'lustre': Operation not permitted
) = 68
close(4) = 0
munmap(0x7f04da388000, 383873) = 0
munmap(0x7f04da2c4000, 802187) = 0
munmap(0x7f04da240000, 537967) = 0
munmap(0x7f04da3ed000, 9332) = 0
exit_group(1) = ?
+++ exited with 1 +++
Update 4
Running the container as --privileged has resolved the original error, but I now hit a new error:
[bluedata#bluedata-5 ~]$ sudo dmesg -c
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
modprobe: ERROR: could not insert 'lustre': Invalid argument
[bluedata#bluedata-5 ~]$ dmesg
[ 2072.258326] LNetError: 56638:0:(api-ni.c:2233:lnet_startup_lndnet()) Can't load LND tcp, module ksocklnd, rc=256
[ 2072.264113] LustreError: 56638:0:(events.c:625:ptlrpc_init_portals()) network initialisation failed
Update 5
The error message suggested I needed to configure the network, so I tried:
[bluedata#bluedata-5 ~]$ sudo modprobe lnet
[bluedata#bluedata-5 ~]$ sudo lnetctl lnet configure
lustre now loads without error:
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lustre.ko.xz
I followed the original steps in the question and run the container as --privileged. Then loading and configuring lnet allowed loading the lustre module without error:
[bluedata#bluedata-5 ~]$ sudo modprobe lnet
[bluedata#bluedata-5 ~]$ sudo lnetctl lnet configure
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lustre.ko.xz
[bluedata#bluedata-5 ~]$
IMPORTANT NOTE: Running with the privileged flag is not recommended. There are other options - reach out to your local BlueData team to learn more.
Some strange processing running and eating up all resource.
I'd killed it many times but still its coming up and starting again.
Highly appreciate your help!
Here is the pstree output for reference:
$ pstree -s
init─┬─-bash
├─-bash───1023*[{-bash}]
├─agetty
├─atd
├─auditd───{auditd}
├─crond
├─dbus-daemon
├─dhclient
├─irqbalance
├─java───311*[{java}]
├─java───81*[{java}]
├─java───55*[{java}]
├─6*[mingetty]
├─ntpd
├─rngd
├─rpc.statd
├─rpcbind
├─rsyslogd───3*[{rsyslogd}]
├─2*[sendmail]
├─sensu-client───{sensu-client}
├─sshd─┬─sshd───sshd───bash───sudo───su───bash
│ ├─2*[sshd───sshd───bash]
│ ├─sshd───sshd───bash───sudo───su───bash───tail
│ └─sshd───sshd───bash───pstree
├─udevd───2*[udevd]
└─yMPzpi───9*[{yMPzpi}]
yMPzpi : This is one one which is starting again with different name even after killing it.
Here strace output for reference:
$ strace -p 294891
Process 294891 attached
restart_syscall(<... resuming interrupted call ...>) = 1
poll([{fd=4, events=POLLIN}], 1, 300000) = 1 ([{fd=4, revents=POLLIN}])
clock_gettime(CLOCK_REALTIME, {1540834737, 507976501}) = 0
poll([{fd=4, events=POLLIN}], 1, 60000) = 1 ([{fd=4, revents=POLLIN}])
recvfrom(4, "{\"id\":1,\"jsonrpc\":\"2.0\",\"error\":"..., 2044, 0, NULL, NULL) = 63
clock_gettime(CLOCK_REALTIME, {1540834737, 508379955}) = 0
poll([{fd=4, events=POLLIN}], 1, 90000) = 1 ([{fd=4, revents=POLLIN}])
poll([{fd=4, events=POLLIN}], 1, 300000) = 1 ([{fd=4, revents=POLLIN}])
clock_gettime(CLOCK_REALTIME, {1540834778, 177325012}) = 0
poll([{fd=4, events=POLLIN}], 1, 60000) = 1 ([{fd=4, revents=POLLIN}])
recvfrom(4, "{\"jsonrpc\":\"2.0\",\"method\":\"job\","..., 2044, 0, NULL, NULL) = 253
clock_gettime(CLOCK_REALTIME, {1540834778, 177412756}) = 0
clock_gettime(CLOCK_REALTIME, {1540834778, 177475706}) = 0
poll([{fd=4, events=POLLIN}], 1, 90000) = 1 ([{fd=4, revents=POLLIN}])
poll([{fd=4, events=POLLIN}], 1, 300000) = 1 ([{fd=4, revents=POLLIN}])
clock_gettime(CLOCK_REALTIME, {1540834846, 399415744}) = 0
poll([{fd=4, events=POLLIN}], 1, 60000) = 1 ([{fd=4, revents=POLLIN}])
recvfrom(4, "{\"jsonrpc\":\"2.0\",\"method\":\"job\","..., 2044, 0, NULL, NULL) = 253
clock_gettime(CLOCK_REALTIME, {1540834846, 399486835}) = 0
clock_gettime(CLOCK_REALTIME, {1540834846, 399551309}) = 0
poll([{fd=4, events=POLLIN}], 1, 90000
^CProcess 294891 detached
<detached ...>
I'm not very knowledgeable in named and bind, but after setting up my domain and playing with named (trying to set up my private email server) I ended up probably messing up something. Taceroute does not work anymore, I get a bind error (ping works). Relevant part of "strare traceroute xxx.xxx.xxx.xxx" is below. Can anybody please guide me to a solution - thx jankom
socket(PF_INET, SOCK_RAW, IPPROTO_ICMP) = 10
socket(PF_INET, SOCK_RAW, IPPROTO_RAW) = 11
getuid32() = 0
setuid32(0) = 0
getpid() = 3212
socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 12
bind(12, {sa_family=AF_UNSPEC, sa_data="\0\0\0\0\0\0\0\0\0\0\0\0\0\0"}, 16) = -1 EINVAL (Invalid argument)
dup(2) = 13
fcntl64(13, F_GETFL) = 0x2 (flags O_RDWR)
fstat64(13, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 4), ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb77c3000
_llseek(13, 0, 0xbfc4ebc0, SEEK_CUR) = -1 ESPIPE (Illegal seek)
write(13, "bind: Invalid argument\n", 23bind: Invalid argument) = 23
) 23
close(13) = 0
munmap(0xb77c3000, 4096) = 0
exit_group(1) = ?
Environment
Oracle Solaris 11 for SPARC
Running in a Non-primary (Guest) Logical Domain (LDOM).
Logged in with root access.
Problem
My application uses libpcap to capture network traffic. When my application (myTestApp) calls libpcap findalldevs, my application only sees one network interface ("lo0"), yet ifconfig -a shows many more interfaces.
My application is statically linked to libpcap (version 1.3). The build machine is SunOS RS-T5120-01 5.10 Generic_141444-09 sun4v sparc SUNW,SPARC-Enterprise-T5120.
Any ideas why my application can't see all the network interfaces ?
Linux command Line Sample Output
# tcpdump --version
tcpdump version 4.1.1
libpcap version 1.1.1
# uname -a
SunOS g99dnpi802-LD 5.11 11.1 sun4v sparc sun4v
# ./myTestApp -adapters
[Available Adapters]
name: "lo0", description: "", address: 127.0.0.1, mask: 255.0.0.0
# ifconfig -a
lo0: flags=2001000849<UP,LOOPBACK,RUNNING,MULTICAST,IPv4,VIRTUAL> mtu 8232 index 1
inet 127.0.0.1 netmask ff000000
net0: flags=100001000843<UP,BROADCAST,RUNNING,MULTICAST,IPv4,PHYSRUNNING> mtu 1500 index 2
inet 10.99.220.15 netmask ffffff00 broadcast 10.99.220.255
ether 0:14:4f:fa:e0:8d
net1: flags=100001000843<UP,BROADCAST,RUNNING,MULTICAST,IPv4,PHYSRUNNING> mtu 1500 index 3
inet 10.99.193.210 netmask ffffff80 broadcast 10.99.193.255
ether 0:14:4f:f9:d0:9c
lo0: flags=2002000849<UP,LOOPBACK,RUNNING,MULTICAST,IPv6,VIRTUAL> mtu 8252 index 1
inet6 ::1/128
net0: flags=120002000840<RUNNING,MULTICAST,IPv6,PHYSRUNNING> mtu 1500 index 2
inet6 ::/0
ether 0:14:4f:fa:e0:8d
net1: flags=120002000840<RUNNING,MULTICAST,IPv6,PHYSRUNNING> mtu 1500 index 3
inet6 ::/0
ether 0:14:4f:f9:d0:9c
# tcpdump -i net1
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on net1, link-type EN10MB (Ethernet), capture size 65535 bytes
09:32:29.520815 IP g99dnpi802-LD.ssh > 10.99.8.102.65436: Flags [P.], seq 3397909586:3397909718, ack 1479093081, win 64240, length 132
09:32:29.520860 IP g99dnpi802-LD.ssh > 10.99.8.102.65436: Flags [P.], seq 132:232, ack 1, win 64240, length 100
09:32:29.521644 IP 10.99.8.102.65436 > g99dnpi802-LD.ssh: Flags [.], ack 132, win 16379, length 0
09:32:29.680844 00:14:4f:f9:8d:84 (oui Unknown) > Broadcast, ethertype Unknown (0xcafe), length 90:
0x0000: 0500 ad85 0939 ffff 0001 ffff 809c 7401 .....9........t.
0x0010: 0000 004c 0000 0000 8070 00ab 0000 0000 ...L.....p......
0x0020: 0000 0000 0000 0000 0043 ffff 2074 6167 .........C...tag
0x0030: 6d61 7374 0672 0014 4ff9 8d84 5f31 3362 mast.r..O..._13b
0x0040: 650a 0000 0000 0000 84f9 0aab e...........
[update]
Here is the (edited) output of running the following truss command on the build machine and the customer machine.
truss –f –a –vall –l –d –o truss.txt ./myTestApp -adapters
truss on build machine
14365/1: 0.0751 so_socket(PF_INET, SOCK_DGRAM, IPPROTO_IP, "", SOV_DEFAULT) = 3
14365/1: 0.0753 so_socket(PF_INET6, SOCK_DGRAM, IPPROTO_IP, "", SOV_DEFAULT) = 4
14365/1: 0.0755 ioctl(3, SIOCGLIFNUM, 0xFFBE9F50) = 0
14365/1: 0.0757 ioctl(3, SIOCGLIFCONF, 0xFFBE9F40) = 0
14365/1: 0.0804 ioctl(3, SIOCGLIFFLAGS, 0xFFBE9DC8) = 0
14365/1: 0.0806 ioctl(3, SIOCGLIFNETMASK, 0xFFBE9C50) = 0
14365/1: 0.0809 open64("/dev/lo", O_RDWR) Err#2 ENOENT
14365/1: 0.0811 open64("/dev/lo0", O_RDWR) Err#2 ENOENT
14365/1: 0.0813 ioctl(3, SIOCGLIFFLAGS, 0xFFBE9DC8) = 0
14365/1: 0.0815 ioctl(3, SIOCGLIFNETMASK, 0xFFBE9C50) = 0
14365/1: 0.0817 ioctl(3, SIOCGLIFBRDADDR, 0xFFBE9AD8) = 0
14365/1: 0.0819 open64("/dev/e1000g", O_RDWR) = 5
truss on customer machine
6346/1: 0.0315 so_socket(PF_INET, SOCK_DGRAM, IPPROTO_IP, 0, SOV_DEFAULT) = 3
6346/1: 0.0319 so_socket(PF_INET6, SOCK_DGRAM, IPPROTO_IP, 0, SOV_DEFAULT) = 5
6346/1: 0.0320 ioctl(3, SIOCGLIFNUM, 0xFFBEA830) = 0
6346/1: 0.0321 ioctl(3, SIOCGLIFCONF, 0xFFBEA820) = 0
6346/1: 0.0322 ioctl(3, SIOCGLIFFLAGS, 0xFFBEA6A8) = 0
6346/1: 0.0323 ioctl(3, SIOCGLIFNETMASK, 0xFFBEA530) = 0
6346/1: 0.0327 open64("/dev/lo", O_RDWR) Err#2 ENOENT
6346/1: 0.0328 open64("/dev/lo0", O_RDWR) = 6
6346/1: 0.0345 ioctl(3, SIOCGLIFFLAGS, 0xFFBEA6A8) = 0
6346/1: 0.0346 ioctl(3, SIOCGLIFNETMASK, 0xFFBEA530) = 0
6346/1: 0.0347 ioctl(3, SIOCGLIFBRDADDR, 0xFFBEA3B8) = 0
6346/1: 0.0347 open64("/dev/net", O_RDWR) Err#21 EISDIR
6346/1: 0.0349 ioctl(3, SIOCGLIFFLAGS, 0xFFBEA6A8) = 0
6346/1: 0.0349 ioctl(3, SIOCGLIFNETMASK, 0xFFBEA530) = 0
6346/1: 0.0350 ioctl(3, SIOCGLIFBRDADDR, 0xFFBEA3B8) = 0
6346/1: 0.0351 open64("/dev/net", O_RDWR) Err#21 EISDIR