I'm trying to set up a lustre client (docs) inside a docker container running on BlueData.
As per this post, I've modified the BlueData config on each worker and the controller node:
$ vi /opt/bluedata/common-install/bd_mgmt/releases/1/sys.config
I added the SYS_ADMIN capability:
{allowed_docker_caps, ["SETPCAP",
"SYS_ADMIN",
...
And rebooted the host.
Next, I provisioned a Centos 7.x cluster in BlueData:
CentOS 7.x with no pre-packaged apps or software
Image Version: 2.2
Distro ID: bluedata/centos7
Then I ssh'd into the Centos container:
$ ssh -o StrictHostKeyChecking=no -i /Users/me/.ssh/id_rsa centos#x.x.x.x
Inside the container, I install the lustre client:
sudo yum install \
kernel \
kernel-devel \
kernel-headers \
kernel-abi-whitelists \
kernel-tools \
kernel-tools-libs \
kernel-tools-libs-devel
cat >/tmp/lustre-repo.conf <<\__EOF
[lustre-server]
name=lustre-server
baseurl=https://downloads.whamcloud.com/public/lustre/latest-release/el7/server
gpgcheck=0
[lustre-client]
name=lustre-client
baseurl=https://downloads.whamcloud.com/public/lustre/latest-release/el7/client
gpgcheck=0
[e2fsprogs-wc]
name=e2fsprogs-wc
baseurl=https://downloads.whamcloud.com/public/e2fsprogs/latest/el7
gpgcheck=0
__EOF
sudo mv /tmp/lustre-repo.conf /etc/yum.repos.d/lustre.repo
sudo reboot
sudo yum install epel-release
sudo yum --nogpgcheck --enablerepo=lustre-client install lustre-client-dkms lustre-client
sudo reboot
However, I receive an error when I try to load the lustre module:
$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz
modprobe: ERROR: could not insert 'lustre': Operation not permitted
I have checked the kernel version:
[bluedata#bluedata-2 ~]$ uname -a
Linux bluedata-2.bdlocal 3.10.0-957.21.3.el7.x86_64 #1 SMP Tue Jun 18 16:35:19 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux
The lustre version I installed is 2.12:
kmod-lustre-client.x86_64 2.12.2-1.el7 #lustre-client
lustre-client.x86_64 2.12.2-1.el7 #lustre-client
Update 1
No errors are shown with dmesg:
[bluedata#bluedata-3 ~]$ dmesg -c
[bluedata#bluedata-3 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz
modprobe: ERROR: could not insert 'lustre': Operation not permitted
[bluedata#bluedata-3 ~]$ dmesg
Update 2
$ sudo strace modprobe lustre
Outputs:
execve("/sbin/modprobe", ["modprobe", "lustre"], [/* 16 vars */]) = 0
brk(NULL) = 0x1648000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff2000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=22387, ...}) = 0
mmap(NULL, 22387, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458fec000
close(3) = 0
open("/lib64/liblzma.so.5", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\2000\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=157424, ...}) = 0
mmap(NULL, 2249352, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458bac000
mprotect(0x7f4458bd1000, 2093056, PROT_NONE) = 0
mmap(0x7f4458dd0000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x24000) = 0x7f4458dd0000
close(3) = 0
open("/lib64/libz.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20!\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=90248, ...}) = 0
mmap(NULL, 2183272, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458996000
mprotect(0x7f44589ab000, 2093056, PROT_NONE) = 0
mmap(0x7f4458baa000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x7f4458baa000
close(3) = 0
open("/lib64/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220*\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=88776, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458feb000
mmap(NULL, 2184192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458780000
mprotect(0x7f4458795000, 2093056, PROT_NONE) = 0
mmap(0x7f4458994000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x7f4458994000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240%\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2151672, ...}) = 0
mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f44583b3000
mprotect(0x7f4458575000, 2097152, PROT_NONE) = 0
mmap(0x7f4458775000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x7f4458775000
mmap(0x7f445877b000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f445877b000
close(3) = 0
open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\260l\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=141968, ...}) = 0
mmap(NULL, 2208904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458197000
mprotect(0x7f44581ae000, 2093056, PROT_NONE) = 0
mmap(0x7f44583ad000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7f44583ad000
mmap(0x7f44583af000, 13448, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f44583af000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458fea000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458fe8000
arch_prctl(ARCH_SET_FS, 0x7f4458fe8740) = 0
mprotect(0x7f4458775000, 16384, PROT_READ) = 0
mprotect(0x7f44583ad000, 4096, PROT_READ) = 0
mprotect(0x7f4458994000, 4096, PROT_READ) = 0
mprotect(0x7f4458baa000, 4096, PROT_READ) = 0
mprotect(0x7f4458dd0000, 4096, PROT_READ) = 0
mprotect(0x621000, 4096, PROT_READ) = 0
mprotect(0x7f4458ff3000, 4096, PROT_READ) = 0
munmap(0x7f4458fec000, 22387) = 0
set_tid_address(0x7f4458fe8a10) = 1264
set_robust_list(0x7f4458fe8a20, 24) = 0
rt_sigaction(SIGRTMIN, {0x7f445819d790, [], SA_RESTORER|SA_SIGINFO, 0x7f44581a65d0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0x7f445819d820, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7f44581a65d0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
brk(NULL) = 0x1648000
brk(0x1669000) = 0x1669000
brk(NULL) = 0x1669000
uname({sysname="Linux", nodename="bluedata-3.bdlocal", ...}) = 0
stat("/etc/modprobe.d", {st_mode=S_IFDIR|0755, st_size=54, ...}) = 0
openat(AT_FDCWD, "/etc/modprobe.d", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries */, 32768) = 128
newfstatat(3, "dccp-blacklist.conf", {st_mode=S_IFREG|0644, st_size=215, ...}, 0) = 0
newfstatat(3, "ko2iblnd.conf", {st_mode=S_IFREG|0644, st_size=999, ...}, 0) = 0
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
stat("/run/modprobe.d", 0x7ffcc1e0a640) = -1 ENOENT (No such file or directory)
stat("/lib/modprobe.d", {st_mode=S_IFDIR|0755, st_size=6, ...}) = 0
openat(AT_FDCWD, "/lib/modprobe.d", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 2 entries */, 32768) = 48
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
open("/etc/modprobe.d/dccp-blacklist.conf", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=215, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# DCCP is considered a potential"..., 4096) = 215
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/etc/modprobe.d/ko2iblnd.conf", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=999, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# Currently it isn't possible to"..., 4096) = 999
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.softdep", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=518, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# Soft dependencies extracted fr"..., 4096) = 518
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/proc/cmdline", O_RDONLY|O_CLOEXEC) = 3
read(3, "BOOT_IMAGE=/boot/vmlinuz-3.10.0-"..., 4095) = 193
read(3, "", 3902) = 0
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.dep.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=382199, ...}) = 0
mmap(NULL, 382199, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458f8a000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.alias.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=802187, ...}) = 0
mmap(NULL, 802187, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458ec6000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.symbols.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=537967, ...}) = 0
mmap(NULL, 537967, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458e42000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.builtin.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=9332, ...}) = 0
mmap(NULL, 9332, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458fef000
close(3) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz", {st_mode=S_IFREG|0644, st_size=58688, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz", {st_mode=S_IFREG|0644, st_size=81772, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz", {st_mode=S_IFREG|0644, st_size=11592, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz", {st_mode=S_IFREG|0644, st_size=133688, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz", {st_mode=S_IFREG|0644, st_size=101472, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz", {st_mode=S_IFREG|0644, st_size=14600, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz", {st_mode=S_IFREG|0644, st_size=369448, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/obdclass.ko.xz", {st_mode=S_IFREG|0644, st_size=270652, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lnet.ko.xz", {st_mode=S_IFREG|0644, st_size=174800, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/libcfs.ko.xz", {st_mode=S_IFREG|0644, st_size=88252, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", {st_mode=S_IFREG|0644, st_size=2028, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
open("/sys/module/lustre/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/sys/module/lustre", 0x7ffcc1e0a5c0) = -1 ENOENT (No such file or directory)
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_pclmul/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_generic/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/sys/module/crct10dif_generic", 0x7ffcc1e0a5c0) = -1 ENOENT (No such file or directory)
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz", O_RDONLY|O_CLOEXEC) = 3
read(3, "\3757zXZ\0", 6) = 6
lseek(3, 0, SEEK_SET) = 0
read(3, "\3757zXZ\0\0\4\346\326\264F\2\0!\1\26\0\0\0t/\345\243\340\30l\6\267]\0?"..., 8192) = 1784
mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4457996000
read(3, "", 8192) = 0
munmap(0x7f4457996000, 8392704) = 0
init_module(0x1653f40, 6253, "") = -1 ENOSYS (Function not implemented)
open("/sys/module/crc_t10dif/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
stat("/sys/module/crc_t10dif", 0x7ffcc1e0a5c0) = -1 ENOSYS (Function not implemented)
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e0b5f0, 6) = -1 ENOSYS (Function not implemented)
lseek(4, 0, SEEK_SET) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e074e0, 8192) = -1 ENOSYS (Function not implemented)
brk(NULL) = -1 ENOSYS (Function not implemented)
brk(0x1e7d000) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e074e0, 8192) = -1 EPERM (Operation not permitted)
close(3) = 0
write(2, "modprobe: ERROR: could not inser"..., 68modprobe: ERROR: could not insert 'lustre': Operation not permitted
) = 68
close(4) = 0
munmap(0x7f4458f8a000, 382199) = 0
munmap(0x7f4458ec6000, 802187) = 0
munmap(0x7f4458e42000, 537967) = 0
munmap(0x7f4458fef000, 9332) = 0
exit_group(1) = ?
+++ exited with 1 +++
Update 3
I tried installing the kmod package instead of dkms:
Running transaction
Installing : kmod-lustre-client-2.12.2-1.el7.x86_64 1/1
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/null': Operation not permitted
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/kmsg': Operation not permitted
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/console': Operation not permitted
Verifying : kmod-lustre-client-2.12.2-1.el7.x86_64 1/1
Installed:
kmod-lustre-client.x86_64 0:2.12.2-1.el7
Complete!
I then tried again sudo strace modprobe lustre:
...
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450be5f0, 6) = -1 ENOSYS (Function not implemented)
lseek(4, 0, SEEK_SET) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450ba4e0, 8192) = -1 ENOSYS (Function not implemented)
brk(NULL) = -1 ENOSYS (Function not implemented)
brk(0x1410000) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450ba4e0, 8192) = -1 EPERM (Operation not permitted)
close(3) = 0
write(2, "modprobe: ERROR: could not inser"..., 68modprobe: ERROR: could not insert 'lustre': Operation not permitted
) = 68
close(4) = 0
munmap(0x7f04da388000, 383873) = 0
munmap(0x7f04da2c4000, 802187) = 0
munmap(0x7f04da240000, 537967) = 0
munmap(0x7f04da3ed000, 9332) = 0
exit_group(1) = ?
+++ exited with 1 +++
Update 4
Running the container as --privileged has resolved the original error, but I now hit a new error:
[bluedata#bluedata-5 ~]$ sudo dmesg -c
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
modprobe: ERROR: could not insert 'lustre': Invalid argument
[bluedata#bluedata-5 ~]$ dmesg
[ 2072.258326] LNetError: 56638:0:(api-ni.c:2233:lnet_startup_lndnet()) Can't load LND tcp, module ksocklnd, rc=256
[ 2072.264113] LustreError: 56638:0:(events.c:625:ptlrpc_init_portals()) network initialisation failed
Update 5
The error message suggested I needed to configure the network, so I tried:
[bluedata#bluedata-5 ~]$ sudo modprobe lnet
[bluedata#bluedata-5 ~]$ sudo lnetctl lnet configure
lustre now loads without error:
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lustre.ko.xz
I followed the original steps in the question and run the container as --privileged. Then loading and configuring lnet allowed loading the lustre module without error:
[bluedata#bluedata-5 ~]$ sudo modprobe lnet
[bluedata#bluedata-5 ~]$ sudo lnetctl lnet configure
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lustre.ko.xz
[bluedata#bluedata-5 ~]$
IMPORTANT NOTE: Running with the privileged flag is not recommended. There are other options - reach out to your local BlueData team to learn more.
I have a Postgresql Server with many databases, one of the database got a stucked INSERT INTO and it hangs all the Database. After rebooting the server activates it´s recovery mode, i did a strace and got the following:
openat(AT_FDCWD, "base/14096409", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 755
getdents(755, /* 1024 entries */, 32768) = 32760
getdents(755, /* 1024 entries */, 32768) = 32768
getdents(755, /* 801 entries */, 32768) = 25632
getdents(755, /* 0 entries */, 32768) = 0
close(755) = 0
openat(AT_FDCWD, "base/14096409", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 755
getdents(755, /* 1024 entries */, 32768) = 32760
getdents(755, /* 1024 entries */, 32768) = 32768
getdents(755, /* 801 entries */, 32768) = 25632
getdents(755, /* 0 entries */, 32768) = 0
close(755) = 0
open("base/14096409", O_RDONLY) = 755
fsync(755) = 0
close(755) = 0
getdents(5, /* 0 entries */, 32768) = 0
close(5) = 0
openat(AT_FDCWD, "pg_tblspc", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 5
getdents(5, /* 2 entries */, 32768) = 48
getdents(5, /* 0 entries */, 32768) = 0
close(5) = 0
open("pg_xlog/000000010000004E0000008B", O_RDWR) = 5
openat(AT_FDCWD, "pg_twophase", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 755
getdents(755, /* 2 entries */, 32768) = 48
getdents(755, /* 0 entries */, 32768) = 0
close(755) = 0
open("base/13702059/13709821", O_RDWR) = 755
lseek(755, 16384, SEEK_SET) = 16384
lseek(755, 0, SEEK_END) = 32768
write(328, "M\0\0\0\270\372\206w\1\0\4\0008\0\360\1\0 \4 \0\0\0\0 \235\300\5\30\232\n\6"..., 8192) = 8192
lseek(755, 8192, SEEK_SET) = 8192
read(755, "N\0\0\0\20\31\224f\1\0\0\0\374\1\310\22\360\37\4 \0\0\0\0\310\2220\0\330\2370\0"..., 8192) = 8192
lseek(755, 0, SEEK_END) = 32768
write(328, "M\0\0\0\340\rQx\1\0\4\0008\0P\1\0 \4 \0\0\0\0(\235\256\5H\232\300\5"..., 8192) = 8192
lseek(755, 24576, SEEK_SET) = 24576
read(755, "N\0\0\0\20\31\224f\1\0\0\0X\0028\21\360\37\4 \0\0\0\0008\221#\0X\2210\0"..., 8192) = 8192
write(328, "M\0\0\0 [$y\1\0\4\0008\0X\1\0 \4 \0\0\0\0(\235\254\5H\232\274\5"..., 8192) = 8192
lseek(755, 0, SEEK_SET) = 0
read(755, "I\0\0\0\310\307\10\t\1\0\0\0000\0\360\37\360\37\4 \0\0\0\0b1\5\0\2\0\0\0"..., 8192) = 8192
semop(3637260, [{12, -1, 0}], 1
Any advice what can i do ? Thanks so much !
I try to run this example, which uses ANN Toolbox for Scilab
https://burubaxair.wordpress.com/2014/03/12/artificial-neural-networks-in-scilab/
This is code:
T = [
1 1 1 1 1
0 0 1 0 0
0 0 1 0 0
0 0 1 0 0
0 0 1 0 0
0 0 1 0 0
0 0 1 0 0
]';
U = [
1 0 0 0 1
1 0 0 0 1
1 0 0 0 1
1 0 0 0 1
1 0 0 0 1
1 0 0 0 1
0 1 1 1 0
]';
N = [35 10 2];
W = ann_FF_init(N);
x = [1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
0, 1, 1, 1, 0]';
t_t = [1 0]';
t_u = [0 1]';
t = [t_t, t_u];
lp = [0.01, 1e-4];
epochs = 3000;
W = ann_FF_Std_batch(x,t,N,W,lp,epochs);
y = ann_FF_run(x,N,W)
disp(y)
But i receive an error:
-->exec('D:\Учёба\Задачи\Recognition.sce', -1)
!--error 15
Подматрица задана некорректно (Submatrix is incorrect).
at line 37 of function ann_FF_grad_BP called by :
at line 25 of function ann_FF_Std_batch called by :
W = ann_FF_Std_batch(x,t,N,W,lp,epochs);
at line 33 of exec file called by :
exec('D:\Учёба\Задачи\Recognition.sce', -1)
An error may be in T and U matrix, but i don't understand why. Could you tell be what i do wrong? Thank you!
You've made 2 errors in your code:
You should not mix testing and training sets.
Testing input must be a single column.
Your first error was x = [ 1.... ] because it contained a single image, whereas you specified in N that you had two output neurons.
As stated in the example, you should have x = [T,U];
Your second error was to give x as a test to ann_FF_run. This function takes test input as a single column. But since you trained your NN with x just before it was a 5x7 matrix. Just change it to a column vector.
Here a corrected and commented code :
T = [...
1 1 1 1 1 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
]';
U = [...
1 0 0 0 1 ...
1 0 0 0 1 ...
1 0 0 0 1 ...
1 0 0 0 1 ...
1 0 0 0 1 ...
1 0 0 0 1 ...
0 1 1 1 0 ...
]';
// setting the traing set of two image
xtrain = [T,U];
// so each image as 35 pixels so N(1) is 35
// and we have two images so N($) is 2
N = [35 10 2];
// training the NN
W = ann_FF_init(N);
// The expected response for T : 1 for T, 0 for U
t_t = [1 0]';
// The expected response for T : 1 for T, 0 for U
t_u = [0 1]';
// the overall response
t = [t_t, t_u];
// some parameters
lp = [0.01, 1e-4];
epochs = 3000;
// getting the weight of the trained NN
W = ann_FF_Std_batch(xtrain,t,N,W,lp,epochs);
// testing the traing set.
y = ann_FF_run(xtrain,N,W)
disp('Testing the traing set')
disp(y) //should get something close to t ~ [1 0 ; 0 1]
// testing a distord U
xtest1 = matrix([1, 0, 0, 0, 1;
1, 1, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
0, 1, 1, 1, 1]',-1,1);
y = ann_FF_run(xtest1,N,W)
disp('Testing a distored U')
disp(y) //should get something close to t_u ~ [0 1]
//testing something different from T and U. should get nothing
xtest2 = matrix([1, 0, 0, 0, 1;
1, 1, 0, 0, 1;
1, 0, 1, 0, 1;
1, 0, 1, 0, 1;
0, 0, 1, 1, 1;
0, 0, 1, 0, 1;
0, 1, 1, 1, 1]',-1,1);
y = ann_FF_run(xtest2,N,W)
disp('Testing something neither T nor U')
disp(y)
and the output from scilab's console
Testing the traing set
0.8538757 0.1075397
0.1393287 0.8957439
Testing a distored U
0.1078667
0.9007755
Testing something neither T nor U
0.3433933
0.6306797
I should find all paths with a graph (24 nodes and 42 vertices). My starting nodes are 1, 2 or 3, and the final nodes are 10, 12, 13, 16, 17, 18, 20, 21, 22 and the rest are intermediate nodes. The sparse adjacency matrix of my graphs A is as follow.
I found the following Matlab code to find the all paths from a starting point to a target point, but the problem is that for example if the starting point is 1, we should not have node 2 in path. In the other words, only one starting point should appear in a path. Can anyone help me with this?
function paths = findpaths(Adj, nodes, currentPath, start, target)
paths = {};
nodes(start) = 0;
currentPath = [currentPath start];
childAdj = Adj(start,:) & nodes;
childList = find(childAdj);
childCount = numel(childList);
if childCount == 0 || start == target
if start == target
paths = [paths; currentPath];
end
return;
end
for idx = 1:childCount
currentNode = childList(idx);
newNodes = nodes;
newNodes(currentNode) = 0;
newPaths = findpaths(Adj, newNodes, currentPath, currentNode, target);
paths = [paths; newPaths];
end
end
Example graph:
A =
(4,1) 1
(5,1) 1
(9,1) 1
(10,1) 1
(12,1) 1
(5,2) 1
(7,2) 1
(8,2) 1
(8,3) 1
(1,4) 1
(5,4) 1
(6,4) 1
(9,4) 1
(15,4) 1
(1,5) 1
(2,5) 1
(4,5) 1
(14,5) 1
(17,5) 1
(4,6) 1
(16,6) 1
(19,6) 1
(20,6) 1
(22,6) 1
(2,7) 1
(20,7) 1
(23,7) 1
(2,8) 1
(3,8) 1
(23,8) 1
(1,9) 1
(4,9) 1
(13,9) 1
(1,10) 1
(12,10) 1
(13,10) 1
(14,11) 1
(17,11) 1
(1,12) 1
(10,12) 1
(16,12) 1
(18,12) 1
(9,13) 1
(10,13) 1
(16,13) 1
(18,13) 1
(5,14) 1
(11,14) 1
(17,14) 1
(4,15) 1
(6,16) 1
(12,16) 1
(13,16) 1
(18,16) 1
(5,17) 1
(11,17) 1
(14,17) 1
(12,18) 1
(13,18) 1
(16,18) 1
(19,18) 1
(21,18) 1
(22,18) 1
(6,19) 1
(18,19) 1
(21,19) 1
(6,20) 1
(7,20) 1
(18,21) 1
(19,21) 1
(24,21) 1
(6,22) 1
(19,22) 1
(24,22) 1
(7,23) 1
(8,23) 1
(19,24) 1
(21,24) 1
(22,24) 1
The call to setsockopt seems to succeed, but checking with getsockopt shows that TCP_NODELAY has not been set properly.
Below is the relevant section of my strace.
Am I missing something?
[00007fd101b10327] socket(PF_INET, SOCK_STREAM, IPPROTO_TCP) = 4
[00007fd102c6d520] connect(4, {sa_family=AF_INET, sin_port=htons(31695), sin_addr=inet_addr("[removed]")}, 16) = 0
[00007fd102c6d1e0] write(3, "110816.344860 [I.COMM_NET] TCP c"..., 155) = 155
[00007fd101a4e880] rt_sigprocmask(SIG_BLOCK, [CHLD], [PIPE], 8) = 0
[00007fd101a4e75d] rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
[00007fd101ad6470] nanosleep({1, 0}, 0x7fff66f9df40) = 0
[00007fd101b102ca] setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_BLOCK, [CHLD], [PIPE], 8) = 0
[00007fd101a4e75d] rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
[00007fd101ad6470] nanosleep({1, 0}, 0x7fff66f9df40) = 0
[00007fd101b102ca] setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_BLOCK, [CHLD], [PIPE], 8) = 0
[00007fd101a4e75d] rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
[00007fd101ad6470] nanosleep({1, 0}, 0x7fff66f9df40) = 0
[00007fd101b0ff8a] getsockopt(4, SOL_TCP, TCP_NODELAY, "", [0]) = 0
The strace output isn't telling you anything about the value of the TCP_NODELAY option. What it is telling you is that you are calling getsockopt with a zero-length buffer for the response. The kernel will not be able to return any useful information when the length of the buffer is zero.
In the output [0] is an indication of the size of the buffer you gave it. You need to specify a buffer size which is at least the size of an integer in order to get a useful response back.