I'm trying to set up a lustre client (docs) inside a docker container running on BlueData.
As per this post, I've modified the BlueData config on each worker and the controller node:
$ vi /opt/bluedata/common-install/bd_mgmt/releases/1/sys.config
I added the SYS_ADMIN capability:
{allowed_docker_caps, ["SETPCAP",
"SYS_ADMIN",
...
And rebooted the host.
Next, I provisioned a Centos 7.x cluster in BlueData:
CentOS 7.x with no pre-packaged apps or software
Image Version: 2.2
Distro ID: bluedata/centos7
Then I ssh'd into the Centos container:
$ ssh -o StrictHostKeyChecking=no -i /Users/me/.ssh/id_rsa centos#x.x.x.x
Inside the container, I install the lustre client:
sudo yum install \
kernel \
kernel-devel \
kernel-headers \
kernel-abi-whitelists \
kernel-tools \
kernel-tools-libs \
kernel-tools-libs-devel
cat >/tmp/lustre-repo.conf <<\__EOF
[lustre-server]
name=lustre-server
baseurl=https://downloads.whamcloud.com/public/lustre/latest-release/el7/server
gpgcheck=0
[lustre-client]
name=lustre-client
baseurl=https://downloads.whamcloud.com/public/lustre/latest-release/el7/client
gpgcheck=0
[e2fsprogs-wc]
name=e2fsprogs-wc
baseurl=https://downloads.whamcloud.com/public/e2fsprogs/latest/el7
gpgcheck=0
__EOF
sudo mv /tmp/lustre-repo.conf /etc/yum.repos.d/lustre.repo
sudo reboot
sudo yum install epel-release
sudo yum --nogpgcheck --enablerepo=lustre-client install lustre-client-dkms lustre-client
sudo reboot
However, I receive an error when I try to load the lustre module:
$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz
modprobe: ERROR: could not insert 'lustre': Operation not permitted
I have checked the kernel version:
[bluedata#bluedata-2 ~]$ uname -a
Linux bluedata-2.bdlocal 3.10.0-957.21.3.el7.x86_64 #1 SMP Tue Jun 18 16:35:19 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux
The lustre version I installed is 2.12:
kmod-lustre-client.x86_64 2.12.2-1.el7 #lustre-client
lustre-client.x86_64 2.12.2-1.el7 #lustre-client
Update 1
No errors are shown with dmesg:
[bluedata#bluedata-3 ~]$ dmesg -c
[bluedata#bluedata-3 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz
modprobe: ERROR: could not insert 'lustre': Operation not permitted
[bluedata#bluedata-3 ~]$ dmesg
Update 2
$ sudo strace modprobe lustre
Outputs:
execve("/sbin/modprobe", ["modprobe", "lustre"], [/* 16 vars */]) = 0
brk(NULL) = 0x1648000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff2000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=22387, ...}) = 0
mmap(NULL, 22387, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458fec000
close(3) = 0
open("/lib64/liblzma.so.5", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\2000\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=157424, ...}) = 0
mmap(NULL, 2249352, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458bac000
mprotect(0x7f4458bd1000, 2093056, PROT_NONE) = 0
mmap(0x7f4458dd0000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x24000) = 0x7f4458dd0000
close(3) = 0
open("/lib64/libz.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20!\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=90248, ...}) = 0
mmap(NULL, 2183272, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458996000
mprotect(0x7f44589ab000, 2093056, PROT_NONE) = 0
mmap(0x7f4458baa000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x7f4458baa000
close(3) = 0
open("/lib64/libgcc_s.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220*\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=88776, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458feb000
mmap(NULL, 2184192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458780000
mprotect(0x7f4458795000, 2093056, PROT_NONE) = 0
mmap(0x7f4458994000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x7f4458994000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240%\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2151672, ...}) = 0
mmap(NULL, 3981792, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f44583b3000
mprotect(0x7f4458575000, 2097152, PROT_NONE) = 0
mmap(0x7f4458775000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c2000) = 0x7f4458775000
mmap(0x7f445877b000, 16864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f445877b000
close(3) = 0
open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\260l\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=141968, ...}) = 0
mmap(NULL, 2208904, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4458197000
mprotect(0x7f44581ae000, 2093056, PROT_NONE) = 0
mmap(0x7f44583ad000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7f44583ad000
mmap(0x7f44583af000, 13448, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f44583af000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458fea000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458fe8000
arch_prctl(ARCH_SET_FS, 0x7f4458fe8740) = 0
mprotect(0x7f4458775000, 16384, PROT_READ) = 0
mprotect(0x7f44583ad000, 4096, PROT_READ) = 0
mprotect(0x7f4458994000, 4096, PROT_READ) = 0
mprotect(0x7f4458baa000, 4096, PROT_READ) = 0
mprotect(0x7f4458dd0000, 4096, PROT_READ) = 0
mprotect(0x621000, 4096, PROT_READ) = 0
mprotect(0x7f4458ff3000, 4096, PROT_READ) = 0
munmap(0x7f4458fec000, 22387) = 0
set_tid_address(0x7f4458fe8a10) = 1264
set_robust_list(0x7f4458fe8a20, 24) = 0
rt_sigaction(SIGRTMIN, {0x7f445819d790, [], SA_RESTORER|SA_SIGINFO, 0x7f44581a65d0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0x7f445819d820, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7f44581a65d0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
brk(NULL) = 0x1648000
brk(0x1669000) = 0x1669000
brk(NULL) = 0x1669000
uname({sysname="Linux", nodename="bluedata-3.bdlocal", ...}) = 0
stat("/etc/modprobe.d", {st_mode=S_IFDIR|0755, st_size=54, ...}) = 0
openat(AT_FDCWD, "/etc/modprobe.d", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 4 entries */, 32768) = 128
newfstatat(3, "dccp-blacklist.conf", {st_mode=S_IFREG|0644, st_size=215, ...}, 0) = 0
newfstatat(3, "ko2iblnd.conf", {st_mode=S_IFREG|0644, st_size=999, ...}, 0) = 0
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
stat("/run/modprobe.d", 0x7ffcc1e0a640) = -1 ENOENT (No such file or directory)
stat("/lib/modprobe.d", {st_mode=S_IFDIR|0755, st_size=6, ...}) = 0
openat(AT_FDCWD, "/lib/modprobe.d", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 3
getdents(3, /* 2 entries */, 32768) = 48
getdents(3, /* 0 entries */, 32768) = 0
close(3) = 0
open("/etc/modprobe.d/dccp-blacklist.conf", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=215, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# DCCP is considered a potential"..., 4096) = 215
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/etc/modprobe.d/ko2iblnd.conf", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=999, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# Currently it isn't possible to"..., 4096) = 999
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.softdep", O_RDONLY|O_CLOEXEC) = 3
fcntl(3, F_GETFL) = 0x8000 (flags O_RDONLY|O_LARGEFILE)
fstat(3, {st_mode=S_IFREG|0644, st_size=518, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4458ff1000
read(3, "# Soft dependencies extracted fr"..., 4096) = 518
read(3, "", 4096) = 0
close(3) = 0
munmap(0x7f4458ff1000, 4096) = 0
open("/proc/cmdline", O_RDONLY|O_CLOEXEC) = 3
read(3, "BOOT_IMAGE=/boot/vmlinuz-3.10.0-"..., 4095) = 193
read(3, "", 3902) = 0
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.dep.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=382199, ...}) = 0
mmap(NULL, 382199, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458f8a000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.alias.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=802187, ...}) = 0
mmap(NULL, 802187, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458ec6000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.symbols.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=537967, ...}) = 0
mmap(NULL, 537967, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458e42000
close(3) = 0
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/modules.builtin.bin", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=9332, ...}) = 0
mmap(NULL, 9332, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4458fef000
close(3) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz", {st_mode=S_IFREG|0644, st_size=58688, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz", {st_mode=S_IFREG|0644, st_size=81772, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz", {st_mode=S_IFREG|0644, st_size=11592, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz", {st_mode=S_IFREG|0644, st_size=133688, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz", {st_mode=S_IFREG|0644, st_size=101472, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz", {st_mode=S_IFREG|0644, st_size=14600, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz", {st_mode=S_IFREG|0644, st_size=369448, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/obdclass.ko.xz", {st_mode=S_IFREG|0644, st_size=270652, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lnet.ko.xz", {st_mode=S_IFREG|0644, st_size=174800, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/extra/libcfs.ko.xz", {st_mode=S_IFREG|0644, st_size=88252, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", {st_mode=S_IFREG|0644, st_size=2028, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
open("/sys/module/lustre/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/sys/module/lustre", 0x7ffcc1e0a5c0) = -1 ENOENT (No such file or directory)
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
stat("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_common.ko.xz", {st_mode=S_IFREG|0644, st_size=2004, ...}) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_pclmul/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_common/initstate", O_RDONLY|O_CLOEXEC) = 3
read(3, "live\n", 31) = 5
read(3, "", 26) = 0
close(3) = 0
open("/sys/module/crct10dif_generic/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/sys/module/crct10dif_generic", 0x7ffcc1e0a5c0) = -1 ENOENT (No such file or directory)
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/crypto/crct10dif_generic.ko.xz", O_RDONLY|O_CLOEXEC) = 3
read(3, "\3757zXZ\0", 6) = 6
lseek(3, 0, SEEK_SET) = 0
read(3, "\3757zXZ\0\0\4\346\326\264F\2\0!\1\26\0\0\0t/\345\243\340\30l\6\267]\0?"..., 8192) = 1784
mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4457996000
read(3, "", 8192) = 0
munmap(0x7f4457996000, 8392704) = 0
init_module(0x1653f40, 6253, "") = -1 ENOSYS (Function not implemented)
open("/sys/module/crc_t10dif/initstate", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
stat("/sys/module/crc_t10dif", 0x7ffcc1e0a5c0) = -1 ENOSYS (Function not implemented)
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e0b5f0, 6) = -1 ENOSYS (Function not implemented)
lseek(4, 0, SEEK_SET) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e074e0, 8192) = -1 ENOSYS (Function not implemented)
brk(NULL) = -1 ENOSYS (Function not implemented)
brk(0x1e7d000) = -1 ENOSYS (Function not implemented)
read(4, 0x7ffcc1e074e0, 8192) = -1 EPERM (Operation not permitted)
close(3) = 0
write(2, "modprobe: ERROR: could not inser"..., 68modprobe: ERROR: could not insert 'lustre': Operation not permitted
) = 68
close(4) = 0
munmap(0x7f4458f8a000, 382199) = 0
munmap(0x7f4458ec6000, 802187) = 0
munmap(0x7f4458e42000, 537967) = 0
munmap(0x7f4458fef000, 9332) = 0
exit_group(1) = ?
+++ exited with 1 +++
Update 3
I tried installing the kmod package instead of dkms:
Running transaction
Installing : kmod-lustre-client-2.12.2-1.el7.x86_64 1/1
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/null': Operation not permitted
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/kmsg': Operation not permitted
mknod: '/var/tmp/dracut.cG1SKj/initramfs/dev/console': Operation not permitted
Verifying : kmod-lustre-client-2.12.2-1.el7.x86_64 1/1
Installed:
kmod-lustre-client.x86_64 0:2.12.2-1.el7
Complete!
I then tried again sudo strace modprobe lustre:
...
open("/lib/modules/3.10.0-957.21.3.el7.x86_64/kernel/lib/crc-t10dif.ko.xz", O_RDONLY|O_CLOEXEC) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450be5f0, 6) = -1 ENOSYS (Function not implemented)
lseek(4, 0, SEEK_SET) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450ba4e0, 8192) = -1 ENOSYS (Function not implemented)
brk(NULL) = -1 ENOSYS (Function not implemented)
brk(0x1410000) = -1 ENOSYS (Function not implemented)
read(4, 0x7fff450ba4e0, 8192) = -1 EPERM (Operation not permitted)
close(3) = 0
write(2, "modprobe: ERROR: could not inser"..., 68modprobe: ERROR: could not insert 'lustre': Operation not permitted
) = 68
close(4) = 0
munmap(0x7f04da388000, 383873) = 0
munmap(0x7f04da2c4000, 802187) = 0
munmap(0x7f04da240000, 537967) = 0
munmap(0x7f04da3ed000, 9332) = 0
exit_group(1) = ?
+++ exited with 1 +++
Update 4
Running the container as --privileged has resolved the original error, but I now hit a new error:
[bluedata#bluedata-5 ~]$ sudo dmesg -c
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
modprobe: ERROR: could not insert 'lustre': Invalid argument
[bluedata#bluedata-5 ~]$ dmesg
[ 2072.258326] LNetError: 56638:0:(api-ni.c:2233:lnet_startup_lndnet()) Can't load LND tcp, module ksocklnd, rc=256
[ 2072.264113] LustreError: 56638:0:(events.c:625:ptlrpc_init_portals()) network initialisation failed
Update 5
The error message suggested I needed to configure the network, so I tried:
[bluedata#bluedata-5 ~]$ sudo modprobe lnet
[bluedata#bluedata-5 ~]$ sudo lnetctl lnet configure
lustre now loads without error:
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lustre.ko.xz
I followed the original steps in the question and run the container as --privileged. Then loading and configuring lnet allowed loading the lustre module without error:
[bluedata#bluedata-5 ~]$ sudo modprobe lnet
[bluedata#bluedata-5 ~]$ sudo lnetctl lnet configure
[bluedata#bluedata-5 ~]$ sudo modprobe -v lustre
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/ptlrpc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fld.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lov.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/osc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/fid.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/mdc.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lmv.ko.xz
insmod /lib/modules/3.10.0-957.21.3.el7.x86_64/extra/lustre.ko.xz
[bluedata#bluedata-5 ~]$
IMPORTANT NOTE: Running with the privileged flag is not recommended. There are other options - reach out to your local BlueData team to learn more.
I try to run this example, which uses ANN Toolbox for Scilab
https://burubaxair.wordpress.com/2014/03/12/artificial-neural-networks-in-scilab/
This is code:
T = [
1 1 1 1 1
0 0 1 0 0
0 0 1 0 0
0 0 1 0 0
0 0 1 0 0
0 0 1 0 0
0 0 1 0 0
]';
U = [
1 0 0 0 1
1 0 0 0 1
1 0 0 0 1
1 0 0 0 1
1 0 0 0 1
1 0 0 0 1
0 1 1 1 0
]';
N = [35 10 2];
W = ann_FF_init(N);
x = [1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
0, 1, 1, 1, 0]';
t_t = [1 0]';
t_u = [0 1]';
t = [t_t, t_u];
lp = [0.01, 1e-4];
epochs = 3000;
W = ann_FF_Std_batch(x,t,N,W,lp,epochs);
y = ann_FF_run(x,N,W)
disp(y)
But i receive an error:
-->exec('D:\Учёба\Задачи\Recognition.sce', -1)
!--error 15
Подматрица задана некорректно (Submatrix is incorrect).
at line 37 of function ann_FF_grad_BP called by :
at line 25 of function ann_FF_Std_batch called by :
W = ann_FF_Std_batch(x,t,N,W,lp,epochs);
at line 33 of exec file called by :
exec('D:\Учёба\Задачи\Recognition.sce', -1)
An error may be in T and U matrix, but i don't understand why. Could you tell be what i do wrong? Thank you!
You've made 2 errors in your code:
You should not mix testing and training sets.
Testing input must be a single column.
Your first error was x = [ 1.... ] because it contained a single image, whereas you specified in N that you had two output neurons.
As stated in the example, you should have x = [T,U];
Your second error was to give x as a test to ann_FF_run. This function takes test input as a single column. But since you trained your NN with x just before it was a 5x7 matrix. Just change it to a column vector.
Here a corrected and commented code :
T = [...
1 1 1 1 1 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
0 0 1 0 0 ...
]';
U = [...
1 0 0 0 1 ...
1 0 0 0 1 ...
1 0 0 0 1 ...
1 0 0 0 1 ...
1 0 0 0 1 ...
1 0 0 0 1 ...
0 1 1 1 0 ...
]';
// setting the traing set of two image
xtrain = [T,U];
// so each image as 35 pixels so N(1) is 35
// and we have two images so N($) is 2
N = [35 10 2];
// training the NN
W = ann_FF_init(N);
// The expected response for T : 1 for T, 0 for U
t_t = [1 0]';
// The expected response for T : 1 for T, 0 for U
t_u = [0 1]';
// the overall response
t = [t_t, t_u];
// some parameters
lp = [0.01, 1e-4];
epochs = 3000;
// getting the weight of the trained NN
W = ann_FF_Std_batch(xtrain,t,N,W,lp,epochs);
// testing the traing set.
y = ann_FF_run(xtrain,N,W)
disp('Testing the traing set')
disp(y) //should get something close to t ~ [1 0 ; 0 1]
// testing a distord U
xtest1 = matrix([1, 0, 0, 0, 1;
1, 1, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
1, 0, 0, 0, 1;
0, 1, 1, 1, 1]',-1,1);
y = ann_FF_run(xtest1,N,W)
disp('Testing a distored U')
disp(y) //should get something close to t_u ~ [0 1]
//testing something different from T and U. should get nothing
xtest2 = matrix([1, 0, 0, 0, 1;
1, 1, 0, 0, 1;
1, 0, 1, 0, 1;
1, 0, 1, 0, 1;
0, 0, 1, 1, 1;
0, 0, 1, 0, 1;
0, 1, 1, 1, 1]',-1,1);
y = ann_FF_run(xtest2,N,W)
disp('Testing something neither T nor U')
disp(y)
and the output from scilab's console
Testing the traing set
0.8538757 0.1075397
0.1393287 0.8957439
Testing a distored U
0.1078667
0.9007755
Testing something neither T nor U
0.3433933
0.6306797
Let's say i have an array:
A = [0, -2, 0, 0, -3, 0, -1, 0];
And I want to see if it can fit any of the patterns below:
B = [1, 1, 1 , 0, 0 , 0 , 0, 0,];
C= [1, 1, 0 , 1, 0 , 0 , 0, 0,]
D= [0, 1, 0 , 1, 0 , 1 , 0, 0];
Where 1 means that the number is unique and 0 means that the number remains the same until another 1 is met but is different from the number before.. Here are some examples:
[-3, -2, -1, 0, 0, 0, 0, 0]; --- A matches B.
[-3, -2, -1, -1, 0, 0, 0, 0]; -- This matches C
[-3, -3, 3, 3, -2, -2, 0, 0]; -- This matches D
Is there any Matlab function for this or must I think up my own way? Any advice, I am very new to Matlab.
There is something wrong about your rule, you seem to treat 0 as a special case (i.e. it is not a number or something).
you could just do something like this:
A=[-3, -2, -1, -1, 0, 0, 0, 0];
[ia ib] = unique(A);
R = zeros(1,8);
R(ib) = 1
>> R =
1 1 0 1 0 0 0 1
and match this, this is assuming that you treat 0 as a number just as you state in the rule.
If you wanted 0 to be special case, you need to :
A=[-3, -2, -1, -1, 0, 0, 0, 0];
[ia ib] = unique(A);
ib(ia==0)=[];
R = zeros(1,8);
R(ib) = 1
>> R =
1 1 0 1 0 0 0 0
and simply match this vector to your B,C,D etc. The second method matches your desired answer, but does not match the rule you state. The first method matches the rule you state but not your desired output.
=============EDIT============
I am on 2010b and some time along the time line, the way unique works changed, you now need to add legacy if you are using any version above 2012b.
Sorry I forgot to mention this:
just change it to :
[ia ib] = unique(A,'legacy');
and it should work fine.
The call to setsockopt seems to succeed, but checking with getsockopt shows that TCP_NODELAY has not been set properly.
Below is the relevant section of my strace.
Am I missing something?
[00007fd101b10327] socket(PF_INET, SOCK_STREAM, IPPROTO_TCP) = 4
[00007fd102c6d520] connect(4, {sa_family=AF_INET, sin_port=htons(31695), sin_addr=inet_addr("[removed]")}, 16) = 0
[00007fd102c6d1e0] write(3, "110816.344860 [I.COMM_NET] TCP c"..., 155) = 155
[00007fd101a4e880] rt_sigprocmask(SIG_BLOCK, [CHLD], [PIPE], 8) = 0
[00007fd101a4e75d] rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
[00007fd101ad6470] nanosleep({1, 0}, 0x7fff66f9df40) = 0
[00007fd101b102ca] setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_BLOCK, [CHLD], [PIPE], 8) = 0
[00007fd101a4e75d] rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
[00007fd101ad6470] nanosleep({1, 0}, 0x7fff66f9df40) = 0
[00007fd101b102ca] setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_BLOCK, [CHLD], [PIPE], 8) = 0
[00007fd101a4e75d] rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
[00007fd101a4e880] rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
[00007fd101ad6470] nanosleep({1, 0}, 0x7fff66f9df40) = 0
[00007fd101b0ff8a] getsockopt(4, SOL_TCP, TCP_NODELAY, "", [0]) = 0
The strace output isn't telling you anything about the value of the TCP_NODELAY option. What it is telling you is that you are calling getsockopt with a zero-length buffer for the response. The kernel will not be able to return any useful information when the length of the buffer is zero.
In the output [0] is an indication of the size of the buffer you gave it. You need to specify a buffer size which is at least the size of an integer in order to get a useful response back.