Hi Linux networking experts,
I am trying to get a tool to monitor all sockets created by each process, and bandwidth used by each process. I could poll that information from /proc, but I would miss short-lived sockets that are created and destroyed between poll cycles.
The idea was to create a kernel module that registers a protocol handler with the networking subsystem, so that my handler function is called for each packet received. In the handler I wanted to look up the socket associated to the sk_buff, and the process that opened the socket. To get the processes waiting for the socket, I go through the wait queue for the socket and check the tasks in the list. I wrote this:
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/kdev_t.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <net/datalink.h>
#include <net/inet_hashtables.h>
#include <net/tcp.h>
#include <net/inet_common.h>
#include <linux/list.h>
#include <linux/ip.h>
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("xxx");
MODULE_AUTHOR("xxxx");
int prot_handler(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
static struct packet_type handler_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.func = prot_handler,
};
int
prot_handler(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
const struct iphdr *iph;
const struct tcphdr *th;
struct sock *sk;
struct socket_wq *wq;
wait_queue_head_t *q;
struct task_struct * task;
//printk(KERN_ALERT "Got sk_buff.\n");
iph = ip_hdr(skb);
th = tcp_hdr(skb);
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
iph->saddr, th->source,
iph->daddr, ntohs(th->dest),
skb->skb_iif);
/* __inet_lookup_skb is crashing. It might be because skb_steal_sock?
*
* __inet_lookup_skb:
* skb_steal_sock
* __inet_lookup
* __inet_lookup_established
* __inet_lookup_listener
*/
if (!sk)
return 0;
//printk(KERN_ALERT "Found active sock.\n");
// code mimics sock_def_readable
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
q = &wq->wait;
if (wq_has_sleeper(wq)) {
// code mimics __wake_up_common
wait_queue_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
task = curr->private;
if (task && task->pid != 0)
printk(KERN_ALERT "Got packet for process ID: %d\n", task->pid);
}
}
}
}
rcu_read_unlock();
return 0;
}
static int __init
dev_init(void) {
printk(KERN_ALERT "Registering protocol handler with network stack.\n");
dev_add_pack(&handler_packet_type);
return 0;
}
static void __exit
dev_exit(void) {
printk(KERN_ALERT "Removing protocol handler.\n");
dev_remove_pack(&handler_packet_type);
}
module_init(dev_init);
module_exit(dev_exit);
When I load this module, and started a ssh session to the system to test it. The handler gets called when I type something on the remote system, but the PID printed doesn't correlate to what I expect. And the handler doesn't always get called. I think there might be a race condition with ip_rcv.
Apr 22 10:20:56 ol71node1 kernel: Got packet for process ID: 13927307
Apr 22 10:20:56 ol71node1 kernel: Got packet for process ID: 13927307
Apr 22 10:20:56 ol71node1 kernel: Got packet for process ID: 13927307
Can someone point to how I could do this, even if the use case doesn't make a lot of sense?
Thanks in advance.
Related
I have the following eBPF program configured to get information about openat calls.
from bcc import BPF
from bcc.utils import printb
import sys
BPF_SOURCE_CODE = r"""
BPF_PERF_OUTPUT(events);
#define MAX_FILENAME 100
struct data_t {
int dirfd;
int flags;
int mode;
char filename[MAX_FILENAME+1];
};
TRACEPOINT_PROBE(syscalls, sys_enter_openat) {
struct data_t data = {};
data.dirfd = args->dfd;
data.flags = args->flags;
data.mode = args->mode;
bpf_probe_read_user(&data.filename, sizeof(data.filename), args->filename);
events.perf_submit(args, &data, sizeof(data));
return 0;
}
"""
bpf = BPF(text = BPF_SOURCE_CODE)
def handle_event(cpu, data, size):
output = bpf["events"].event(data)
print(output.filename)
bpf["events"].open_perf_buffer(handle_event)
while True:
try:
bpf.perf_buffer_poll()
except KeyboardInterrupt:
print()
sys.exit(0)
I wrote the following test program to verify that the eBPF program works correctly.
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
int main()
{
int fd;
fd = open("/tmp/test.txt", O_RDONLY);
fd = open("/tmp/test1.txt", O_RDONLY);
fd = open("/tmp/test2.txt", O_RDONLY);
}
The eBPF program reliably captures the filenames for the second and third open calls, but never captures the filename for the first call:
$ sudo python3 listen.py
<-- snip -->
b''
b'/tmp/test1.txt'
b'/tmp/test2.txt'
Any ideas on why the eBPF program isn't getting information about the filename for the first open call?
Hey everyone I'm trying to sniff packets using the pcap library. I have just one problem that I can not figure out: ERROR: BPF program is not valid.
I'm trying to start the sniffing but this error is blocking me I searched on the web and found nothing.
My code is based after this program: https://github.com/levans248/packetSniffingAndSpoofing/blob/master/sniff.c
It is due to SEED labs I know people do not help when it is homework but I just need to figure why this is happening I have no clue.
#include <pcap.h>
#include <stdio.h>
#include <stdlib.h>
#include <arpa/inet.h>
void got_packet(u_char *args, const struct pcap_pkthdr *header, const u_char *packet)
{
printf("Got a packet \n");
}
int main()
{
pcap_t *handle;
char errbuf[PCAP_ERRBUF_SIZE];
struct bpf_program fp;
char filter_exp[] = "ip proto icmp";
bpf_u_int32 net;
// Open live pcap session
handle = pcap_open_live("enp0s3", BUFSIZ, 1, 1000, errbuf);
// Compile Filter into the Berkeley Packet Filter (BPF)
pcap_compile(handle, &fp, filter_exp, 0, net);
if (pcap_setfilter(handle, &fp) == -1)
{
pcap_perror(handle, "ERROR");
exit(EXIT_FAILURE);
}
// Sniffing..
pcap_loop(handle, -1, got_packet, NULL);
pcap_close(handle);
return 0;
}
There was a SYNTAX mistake in the filter_exp ,
I was working on C-Shell so was needed to change to ip proto \icmp
Thank you very much everyone !
I'm completely new to netlink & co. and I am trying to establisch a connection from user space to the w1-kernel module of a raspberry pi.
Unfortunately the documentation i found is spotty and contradictory.
Here some of the things not clear to me:
basic communication is:
generate a socket: socket()
int s = socket(AF_NETLINK,SOCK_DGRAM, NETLINK_CONNECTOR);
bind it to a local name: bind()
int b = bind(s,(sockaddr*)&sa,sizeof(sa));
with
sa.nl_family=AF_NETLINK;
sa.nl_pid=getpid();//0?
sa.nl_groups=0; //23? -1?
create the message
send it: send()? sendmsg()?
wait for answer: poll()
read answer: recv()
+In examples i found (w1d.c and ucon.c) they use the send() command (not sendmsg) without a connect(), even though the man pages of send say that wouldnt work.
+I am not clear about the structure of the message:
send can send any buffer (char*)
netlink expects a struct nlmsghdr header;
connector expects a struct cn_msg header.
w1_netlink expects a w1_netlink_msg header and w1_netlink_cmd data.
Do i need all headers in a row? Ther are 2 sequence / message number variables, one in nlmsghdr and on in cn_msg???
The test program i wrote is not producing the result i expect: every thing works withour producing an error but i am getting no answer :-(
#include <iostream>
#include <linux/netlink.h>
#include <sys/types.h>
#include <sys/socket.h>
#include<sys/poll.h>
#include <unistd.h>
#include<cstring>
#include "w1_netlink.h"
__u32 nl_seq;
static int netlink_send(int s, struct cn_msg *msg) //copy from (ucon.c)
{
struct nlmsghdr *nlh;
unsigned int size;
int err;
char buf[128];
struct cn_msg *m;
size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len);
nlh = (struct nlmsghdr *)buf;
nlh->nlmsg_seq = nl_seq++;
nlh->nlmsg_pid = getpid();
nlh->nlmsg_type = NLMSG_DONE;
nlh->nlmsg_len = size;
nlh->nlmsg_flags = 0;
m = (cn_msg*) NLMSG_DATA(nlh);
memcpy(m, msg, sizeof(*m) + msg->len);
err = send(s, nlh, size, 0);
return err;
}
int main(int argc, char *argv[])
{
nl_seq=0;
int s = socket(AF_NETLINK,SOCK_DGRAM, NETLINK_CONNECTOR);
if(s==-1) {std::cout<<"no socket"; return s;};
std::cout<<"socket "<<s;
sockaddr_nl sa;
sa.nl_family=AF_NETLINK;
sa.nl_pid=0;//getpid();
sa.nl_groups=0;
int b = bind(s,(sockaddr*)&sa,sizeof(sa));
if(b==-1){std::cout<<"bind error";return b;}; //prints 3
std::cout<<"bind "<<b; //prints 0
int si=sizeof(struct cn_msg)+sizeof(struct w1_netlink_msg)+sizeof(w1_netlink_cmd);
char * buf;
buf=(char *)malloc(1024);
memset(buf,0,1024);
cn_msg *cnh = (cn_msg*)buf;
w1_netlink_msg* wnh=(w1_netlink_msg*)&cnh->data;
w1_netlink_cmd* wcmd = (w1_netlink_cmd*)&wnh->data;
cnh->id.idx=CN_W1_IDX;
cnh->id.val=CN_W1_VAL;
cnh->seq=nl_seq;
cnh->flags=0;
wnh->type=W1_LIST_MASTERS;
wnh->len=0;
cnh->len=sizeof(struct w1_netlink_msg)+sizeof(w1_netlink_cmd);
int len=netlink_send(s,cnh);
std::cout<<"send "<<len<<" "<<(int)wnh->status; //prints 52 0
pollfd pfd;
pfd.fd=s;
pfd.events=POLLIN;
pfd.revents=0;
int p=0;
while(p<1) {
p=poll(&pfd,1,1000);
std::cout<<"poll "<<p<<pfd.revents; //prints 0 0 in infinite loop
std::cout.flush();
};
memset(wcmd,0,128);
len=recv(s,buf,255,0);
std::cout<<"recv "<<len;
close(s);
return 0;
}
Result is socket 3 bind 0 send 52 0 poll 00 poll 00 ...
Thanks
As of title.
The program will wait for the first event, and then go into an infinite loop - why doesn't it just process one event at a time?
#include <stdio.h>
#include <stdlib.h>
#include <sys/inotify.h>
#include <unistd.h>
int main (int argc, char **argv)
{
int id, wd;
int a;
struct inotify_event e;
id = inotify_init ();
wd = inotify_add_watch (id, "/home/andrea/Downloads", IN_CREATE);
puts ("waiting...");
while (read (id, &e, sizeof (struct inotify_event)))
{
printf ("created %s\n", e.name);
puts ("waiting...");
}
return 0;
}
Firstly, the events reported by inotify aren't of the size inotify_event, since there is an additional name reported as well. Use ioctl with FIONREAD to get the amount of bytes available for reading.
int avail;
ioctl(id, FIONREAD, &avail);
Secondly, you used blocking I/O. If you instead use inotify_init1(O_NONBLOCK) to initialise inotify, read() will immediately return and set errno to EAGAIN if no data is available. Of course, this is optional if you first used FIONREAD to check if there is data available in the first place.
I have a unit test that checks behavior on blocking and non-blocking sockets - the server writes a long response and at some point it should not be able to write any more and it
blocks on write.
Basically one side writes and other side does not reads.
Under Solaris at some point I get a error "Not enough space" (after writing 75MB) instead of blocking on write:
Program that reproduces the problem:
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <netinet/in.h>
char const *address = "127.0.0.1";
#define check(x) do { if( (x) < 0) { perror(#x) ; exit(1); } } while(0)
int main()
{
signal(SIGPIPE,SIG_IGN);
struct sockaddr_in inaddr = {};
inaddr.sin_family = AF_INET;
inaddr.sin_addr.s_addr = inet_addr(address);
inaddr.sin_port = htons(8080);
int res = fork();
if(res < 0) {
perror("fork");
exit(1);
}
if(res > 0) {
int fd = -1;
int status;
sleep(1);
check(fd = socket(AF_INET,SOCK_STREAM,0));
check(connect(fd,(sockaddr*)&inaddr,sizeof(inaddr)));
sleep(5);
close(fd);
wait(&status);
return 0;
}
else {
int acc,fd;
check(acc = socket(AF_INET,SOCK_STREAM,0));
int yes = 1;
check(setsockopt(acc,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(yes)));
check(bind(acc,(sockaddr*)&inaddr,sizeof(inaddr)));
check(listen(acc,10));
check(fd = accept(acc,0,0));
char buf[1000];
long long total= 0;
do {
int r = send(fd,buf,sizeof(buf),0);
if(r < 0) {
printf("write %s\n",strerror(errno));
return 0;
}
else if(r==0) {
printf("Got eof\n");
return 0;
}
total += r;
if(total > 100*1024*1024) {
printf("Too much!!!!\n");
return 0;
}
printf("%lld\n",total);
}while(1);
}
return 0;
}
The output on Solaris (last two lines)
75768000
write Not enough space
The expected output on Linux (last two lines)
271760
write Connection reset by peer
Which happens only when the other side closes the socket.
Any ideas why and how can I fix it, what options to set?
P.S.: It is OpenSolaris 2009.06, x86
Edits
Added full C code that reproduces the problem
Answer:
This seems like a bug in specific version of Solaris kernel, libc library.
From OpenSolaris source code, I'm afraid the SO_SNDTIMEO option is unsupported: https://hg.java.net/hg/solaris~on-src/file/tip/usr/src/uts/common/inet/sockmods/socksctp.c#l1233
If you want to block if there's no space available, you need to write code to do that.
POSIX is pretty clear that write on a socket is equivalent to send with no options, and that send "may fail if ... [i]nsufficient resources were available in the system to perform the operation."