eBPF Program Not Capturing Syscall Argument - ebpf

I have the following eBPF program configured to get information about openat calls.
from bcc import BPF
from bcc.utils import printb
import sys
BPF_SOURCE_CODE = r"""
BPF_PERF_OUTPUT(events);
#define MAX_FILENAME 100
struct data_t {
int dirfd;
int flags;
int mode;
char filename[MAX_FILENAME+1];
};
TRACEPOINT_PROBE(syscalls, sys_enter_openat) {
struct data_t data = {};
data.dirfd = args->dfd;
data.flags = args->flags;
data.mode = args->mode;
bpf_probe_read_user(&data.filename, sizeof(data.filename), args->filename);
events.perf_submit(args, &data, sizeof(data));
return 0;
}
"""
bpf = BPF(text = BPF_SOURCE_CODE)
def handle_event(cpu, data, size):
output = bpf["events"].event(data)
print(output.filename)
bpf["events"].open_perf_buffer(handle_event)
while True:
try:
bpf.perf_buffer_poll()
except KeyboardInterrupt:
print()
sys.exit(0)
I wrote the following test program to verify that the eBPF program works correctly.
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
int main()
{
int fd;
fd = open("/tmp/test.txt", O_RDONLY);
fd = open("/tmp/test1.txt", O_RDONLY);
fd = open("/tmp/test2.txt", O_RDONLY);
}
The eBPF program reliably captures the filenames for the second and third open calls, but never captures the filename for the first call:
$ sudo python3 listen.py
<-- snip -->
b''
b'/tmp/test1.txt'
b'/tmp/test2.txt'
Any ideas on why the eBPF program isn't getting information about the filename for the first open call?

Related

Stuck with netlink, connektor, socket and co

I'm completely new to netlink & co. and I am trying to establisch a connection from user space to the w1-kernel module of a raspberry pi.
Unfortunately the documentation i found is spotty and contradictory.
Here some of the things not clear to me:
basic communication is:
generate a socket: socket()
int s = socket(AF_NETLINK,SOCK_DGRAM, NETLINK_CONNECTOR);
bind it to a local name: bind()
int b = bind(s,(sockaddr*)&sa,sizeof(sa));
with
sa.nl_family=AF_NETLINK;
sa.nl_pid=getpid();//0?
sa.nl_groups=0; //23? -1?
create the message
send it: send()? sendmsg()?
wait for answer: poll()
read answer: recv()
+In examples i found (w1d.c and ucon.c) they use the send() command (not sendmsg) without a connect(), even though the man pages of send say that wouldnt work.
+I am not clear about the structure of the message:
send can send any buffer (char*)
netlink expects a struct nlmsghdr header;
connector expects a struct cn_msg header.
w1_netlink expects a w1_netlink_msg header and w1_netlink_cmd data.
Do i need all headers in a row? Ther are 2 sequence / message number variables, one in nlmsghdr and on in cn_msg???
The test program i wrote is not producing the result i expect: every thing works withour producing an error but i am getting no answer :-(
#include <iostream>
#include <linux/netlink.h>
#include <sys/types.h>
#include <sys/socket.h>
#include<sys/poll.h>
#include <unistd.h>
#include<cstring>
#include "w1_netlink.h"
__u32 nl_seq;
static int netlink_send(int s, struct cn_msg *msg) //copy from (ucon.c)
{
struct nlmsghdr *nlh;
unsigned int size;
int err;
char buf[128];
struct cn_msg *m;
size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len);
nlh = (struct nlmsghdr *)buf;
nlh->nlmsg_seq = nl_seq++;
nlh->nlmsg_pid = getpid();
nlh->nlmsg_type = NLMSG_DONE;
nlh->nlmsg_len = size;
nlh->nlmsg_flags = 0;
m = (cn_msg*) NLMSG_DATA(nlh);
memcpy(m, msg, sizeof(*m) + msg->len);
err = send(s, nlh, size, 0);
return err;
}
int main(int argc, char *argv[])
{
nl_seq=0;
int s = socket(AF_NETLINK,SOCK_DGRAM, NETLINK_CONNECTOR);
if(s==-1) {std::cout<<"no socket"; return s;};
std::cout<<"socket "<<s;
sockaddr_nl sa;
sa.nl_family=AF_NETLINK;
sa.nl_pid=0;//getpid();
sa.nl_groups=0;
int b = bind(s,(sockaddr*)&sa,sizeof(sa));
if(b==-1){std::cout<<"bind error";return b;}; //prints 3
std::cout<<"bind "<<b; //prints 0
int si=sizeof(struct cn_msg)+sizeof(struct w1_netlink_msg)+sizeof(w1_netlink_cmd);
char * buf;
buf=(char *)malloc(1024);
memset(buf,0,1024);
cn_msg *cnh = (cn_msg*)buf;
w1_netlink_msg* wnh=(w1_netlink_msg*)&cnh->data;
w1_netlink_cmd* wcmd = (w1_netlink_cmd*)&wnh->data;
cnh->id.idx=CN_W1_IDX;
cnh->id.val=CN_W1_VAL;
cnh->seq=nl_seq;
cnh->flags=0;
wnh->type=W1_LIST_MASTERS;
wnh->len=0;
cnh->len=sizeof(struct w1_netlink_msg)+sizeof(w1_netlink_cmd);
int len=netlink_send(s,cnh);
std::cout<<"send "<<len<<" "<<(int)wnh->status; //prints 52 0
pollfd pfd;
pfd.fd=s;
pfd.events=POLLIN;
pfd.revents=0;
int p=0;
while(p<1) {
p=poll(&pfd,1,1000);
std::cout<<"poll "<<p<<pfd.revents; //prints 0 0 in infinite loop
std::cout.flush();
};
memset(wcmd,0,128);
len=recv(s,buf,255,0);
std::cout<<"recv "<<len;
close(s);
return 0;
}
Result is socket 3 bind 0 send 52 0 poll 00 poll 00 ...
Thanks

why setuid fails after capset is used?

trying to figure out the linux capabilities interface, i came across with an unexpected issue (for me at least). When seting the capabilities of a process with the capset syscall the kernel rejects a change of userid with the setuid syscall. Does anybody know why setuid fails?
This is code i wrote to test this behavior:
#undef _POSIX_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <linux/capability.h>
#include <sys/capability.h>
#include <string.h>
int main(int argc, char** argv){
struct __user_cap_header_struct cap_header;
struct __user_cap_data_struct cap_data;
int cap_res;
FILE *file;
int sockfd;
cap_header.pid = getpid();
cap_header.version = _LINUX_CAPABILITY_VERSION_1;
__u32 cap_mask = 0;
cap_mask |= (1 << CAP_DAC_OVERRIDE);
cap_mask |= (1 << CAP_SETUID);
printf("You selected mask: %x\n", cap_mask);
cap_data.effective = cap_mask;
cap_data.permitted = cap_mask;
cap_data.inheritable = cap_mask;
cap_res = capset(&cap_header, &cap_data);
if(cap_res < 0){
printf("Trying to apply mask: FAIL\n", cap_mask);
} else {
printf("Capability set correctly\n");
}
int uid = atol(argv[1]);
int setuid_res = setuid(uid);
if (setuid_res == -1){
printf("7w7\n");
} else {
printf("UID set correctly\n");
}
}
compiled with:
$ gcc -g test1.c -o test1
Output is (for user id: 1000)
$ # ./test1 1000
You selected mask: 2
Capability set correctly
7w7
I think you might be missing a couple of steps in your question:
How do you give the binary some privilege?
It looks like you are trying to use cap_dac_override to achieve what cap_setuid is intended for.
Rewriting the program as follows:
#undef _POSIX_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <linux/capability.h>
#include <sys/capability.h>
#include <string.h>
int main(int argc, char** argv){
struct __user_cap_header_struct cap_header;
struct __user_cap_data_struct cap_data;
int cap_res;
// need to start from known data. C does not guarantee these are
// zero filled by default. You could declare them static to get
// that.
memset(&cap_header, 0, sizeof(cap_header));
memset(&cap_data, 0, sizeof(cap_data));
cap_header.pid = getpid();
cap_header.version = _LINUX_CAPABILITY_VERSION_1;
__u32 cap_mask = 0;
cap_mask |= (1 << CAP_SETUID);
printf("You selected mask: %x\n", cap_mask);
cap_data.effective = cap_mask;
cap_data.permitted = cap_mask;
// not needed: cap_data.inheritable = cap_mask;
cap_res = capset(&cap_header, &cap_data);
if(cap_res < 0){
printf("Trying to apply mask: FAIL\n", cap_mask);
exit(1);
} else {
printf("Capability set correctly\n");
}
if (argc != 2) {
printf("usage: %s <uid>\n", argv[0]);
exit(1);
}
int uid = atol(argv[1]);
int setuid_res = setuid(uid);
if (setuid_res == -1){
printf("7w7\n");
} else {
printf("UID set correctly to %d\n", uid);
}
}
You can run the program like this:
$ sudo ./test1 1000
You selected mask: 80
Capability set correctly
UID set correctly to 1000
Or, using a file capability:
$ sudo setcap cap_setuid=p ./test1
$ ./test1 1000
You selected mask: 80
Capability set correctly
UID set correctly to 1000
This will work if you want to use the first 32 capabilities. However, there are ~40 of them under Linux at present, so I'd suggest you look into using the libcap API instead which figures out all of the kernel ABI details for you.

Behavior of select() on stdin when used on a pipe

I am trying to understand an observation on behavior of select() when used on stdin, when it is receiving data from a pipe.
Basically I had a simple C program using the following code:
hello.c:
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <termios.h>
int main(int argc, char *argv[])
{
int flags, opt;
int nsecs, tfnd;
fd_set rfds;
struct timeval tv;
int retval;
int stdin_fileno_p1 = STDIN_FILENO+1;
char c;
int n;
/* Turn off canonical processing on stdin*/
static struct termios oldt, newt;
tcgetattr( STDIN_FILENO, &oldt);
newt = oldt;
newt.c_lflag &= ~(ICANON);
tcsetattr( STDIN_FILENO, TCSANOW, &newt);
while (1)
{
FD_ZERO(&rfds);
FD_SET(STDIN_FILENO, &rfds);
tv.tv_sec = 0;
tv.tv_usec = 0;
retval = select(stdin_fileno_p1, &rfds, NULL, NULL, &tv);
if ( retval && (retval!=-1) )
{
n = read(STDIN_FILENO, &c, 1);
write(STDOUT_FILENO, &c, 1);
}
else printf("No Data\n");
usleep(100000);
}
tcsetattr( STDIN_FILENO, TCSANOW, &oldt);
}
If I ran the program as follows I could see characters echoing when I type keys on while the program is running. When keys are not pressed, it displays "No Data" as expected.
./hello
However, if use the program as follows, the program never gets to a state where is displays "No Data". Instead last character "c" is repeatedly displayed.
echo -n abc | ./hello
I'm a bit puzzled by this observation, and would be grateful if you could help me to understand the observed behavior.
The problem is that your program does not detect an end-of-file condition when it reads from the STDIN_FILENO descriptor. After echo has written the c character it will close its end of the pipe, which will cause the select in your program to return immediately and your read to return 0 as an indication that no more data will ever be available from that descriptor. Your program doesn't detect that condition. Instead it just calls write with whatever character was left in the buffer by the last successful read and then repeats the loop.
To fix, do if (n==0) break; after the read.

socket is not blocking on write operation: OpenSolaris

I have a unit test that checks behavior on blocking and non-blocking sockets - the server writes a long response and at some point it should not be able to write any more and it
blocks on write.
Basically one side writes and other side does not reads.
Under Solaris at some point I get a error "Not enough space" (after writing 75MB) instead of blocking on write:
Program that reproduces the problem:
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <netinet/in.h>
char const *address = "127.0.0.1";
#define check(x) do { if( (x) < 0) { perror(#x) ; exit(1); } } while(0)
int main()
{
signal(SIGPIPE,SIG_IGN);
struct sockaddr_in inaddr = {};
inaddr.sin_family = AF_INET;
inaddr.sin_addr.s_addr = inet_addr(address);
inaddr.sin_port = htons(8080);
int res = fork();
if(res < 0) {
perror("fork");
exit(1);
}
if(res > 0) {
int fd = -1;
int status;
sleep(1);
check(fd = socket(AF_INET,SOCK_STREAM,0));
check(connect(fd,(sockaddr*)&inaddr,sizeof(inaddr)));
sleep(5);
close(fd);
wait(&status);
return 0;
}
else {
int acc,fd;
check(acc = socket(AF_INET,SOCK_STREAM,0));
int yes = 1;
check(setsockopt(acc,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(yes)));
check(bind(acc,(sockaddr*)&inaddr,sizeof(inaddr)));
check(listen(acc,10));
check(fd = accept(acc,0,0));
char buf[1000];
long long total= 0;
do {
int r = send(fd,buf,sizeof(buf),0);
if(r < 0) {
printf("write %s\n",strerror(errno));
return 0;
}
else if(r==0) {
printf("Got eof\n");
return 0;
}
total += r;
if(total > 100*1024*1024) {
printf("Too much!!!!\n");
return 0;
}
printf("%lld\n",total);
}while(1);
}
return 0;
}
The output on Solaris (last two lines)
75768000
write Not enough space
The expected output on Linux (last two lines)
271760
write Connection reset by peer
Which happens only when the other side closes the socket.
Any ideas why and how can I fix it, what options to set?
P.S.: It is OpenSolaris 2009.06, x86
Edits
Added full C code that reproduces the problem
Answer:
This seems like a bug in specific version of Solaris kernel, libc library.
From OpenSolaris source code, I'm afraid the SO_SNDTIMEO option is unsupported: https://hg.java.net/hg/solaris~on-src/file/tip/usr/src/uts/common/inet/sockmods/socksctp.c#l1233
If you want to block if there's no space available, you need to write code to do that.
POSIX is pretty clear that write on a socket is equivalent to send with no options, and that send "may fail if ... [i]nsufficient resources were available in the system to perform the operation."

a program that allocates huge chunks of memory using mmap(say 1GB) [duplicate]

I am writing a program that allocates huge chunks of memory using mmap and then accesses random memory locations to read and write into it.
I just tried out the following code:
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
int main() {
int fd,len=1024*1024;
fd=open("hello",O_READ);
char*addr=mmap(0,len,PROT_READ+PROT_WRITE,MAP_SHARED,fd,0);
for(fd=0;fd<len;fd++)
putchar(addr[fd]);
if (addr==MAP_FAILED) {perror("mmap"); exit(1);}
printf("mmap returned %p, which seems readable and writable\n",addr);
munmap(addr,len);
return 0;
}
But I cannot execute this program, is there anything wrong with my code?
First of all, the code won't even compile on my debian box. O_READ isn't a correct flag for open() as far as I know.
Then, you first use fd as a file descriptor and the you use it as a counter in your for loop.
I don't understand what you're trying to do, but I think you misunderstood something about mmap.
mmap is used to map a file into the memory, this way you can read / write to the created memory mapping instead of using functions to access the file.
Here's a short program that open a file, map it the the memory and print the returner pointer :
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main() {
int fd;
int result;
int len = 1024 * 1024;
fd = open("hello",O_RDWR | O_CREAT | O_TRUNC, (mode_t) 0600);
// stretch the file to the wanted length, writting something at the end is mandatory
result = lseek(fd, len - 1, SEEK_SET);
if(result == -1) { perror("lseek"); exit(1); }
result = write(fd, "", 1);
if(result == -1) { perror("write"); exit(1); }
char*addr = mmap(0, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (addr==MAP_FAILED) { perror("mmap"); exit(1); }
printf("mmap returned %p, which seems readable and writable\n",addr);
result = munmap(addr, len);
if (result == -1) { perror("munmap"); exit(1); }
close(fd);
return 0;
}
I left out the for loop, since I didn't understood its purpose. Since you create a file and you want to map it on a given length, we have to "stretch" the file to the given length too.
Hope this helps.