eBPF verifier: R1 is not a scalar - ebpf

I have this eBPF code:
struct sock_info {
struct sockaddr addr;
};
SEC("tracepoint/syscalls/sys_enter_accept4")
int sys_enter_accept4(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen, int flags) {
struct sock_info *iad = bpf_ringbuf_reserve(&connections, sizeof(struct sock_info), 0);
if (!iad) {
bpf_printk("can't reserve ringbuf space");
return 0;
}
// https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
bpf_probe_read(&iad->addr, sizeof(struct sockaddr), upeer_sockaddr);
bpf_ringbuf_submit(iad, 0);
return 0;
}
When I try to load it from the user space, the Cilium eBPF library returns me this Verification error:
permission denied
R1 is not a scalar
; int sys_enter_accept4(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen, int flags) {
0: (bf) r6 = r2
R2 !read_ok
processed 1 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
If I remove the bpf_probe_read function, then the code runs. I tried many alternatives to try to read the contents of the *upeer_sockaddr pointer, but did not succeed.
Any hint why the eBPF verifier is complaining?
This is the output of llvm-objdump command:
llvm-objdump -S --no-show-raw-insn pkg/ebpf/bpf_bpfel.o
pkg/ebpf/bpf_bpfel.o: file format elf64-bpf
Disassembly of section tracepoint/syscalls/sys_enter_accept4:
0000000000000000 <sys_enter_accept4>:
0: r6 = r2
1: r1 = 0 ll
3: r2 = 16
4: r3 = 0
5: call 131
6: r7 = r0
7: if r7 != 0 goto +5 <LBB0_2>
8: r1 = 0 ll
10: r2 = 28
11: call 6
12: goto +7 <LBB0_3>
0000000000000068 <LBB0_2>:
13: r1 = r7
14: r2 = 16
15: r3 = r6
16: call 4
17: r1 = r7
18: r2 = 0
19: call 132
00000000000000a0 <LBB0_3>:
20: r0 = 0
21: exit

You have defined your tracepoint program with 4 arguments int sys_enter_accept4(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen, int flags)
But these are not the parameters with which your program will be invoked.
The R2 !read_ok error is caused because you are accessing a second, non-existing parameter and you are not allowed to read from uninitialized registers.
For tracepoints you can find out the context structure by looking at the sysfs:
$ cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_accept4/format
name: sys_enter_accept4
ID: 1595
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:int __syscall_nr; offset:8; size:4; signed:1;
field:int fd; offset:16; size:8; signed:0;
field:struct sockaddr * upeer_sockaddr; offset:24; size:8; signed:0;
field:int * upeer_addrlen; offset:32; size:8; signed:0;
field:int flags; offset:40; size:8; signed:0;
print fmt: "fd: 0x%08lx, upeer_sockaddr: 0x%08lx, upeer_addrlen: 0x%08lx, flags: 0x%08lx", ((unsigned long)(REC->fd)), ((unsigned long)(REC->upeer_sockaddr)), ((unsigned long)(REC->upeer_addrlen)), ((unsigned long)(REC->flags))
If we turn this into a structure we get the following:
struct accept4_args {
u64 pad;
u32 __syscall_nr;
u32 fd;
struct sockaddr *upeer_sockaddr;
int *upeer_addrlen;
int flags;
};
Note that I replaced the common_ fields here with u64 pad, since that is likely not what you are interested in.
A pointer to the struct is passed in as one parameter: int sys_enter_accept4(struct accept4_args *args)

Related

string matching in bpf programs

I am writing a bpf program in which i need to match prefix of filename in openat syscall.
Since we cannot link libc, and there is no such builtin function, i wrote one myself.
#define MAX_FILE_NAME_LENGTH 128
#define LOG_DIR "/my/prefix"
#define LEN_LOG_DIR sizeof(LOG_DIR)
int matchPrefix(char str[MAX_FILE_NAME_LENGTH]) {
for (int i = 0; i < LEN_LOG_DIR; i++) {
char ch1 = LOG_DIR[i];
if (ch1 == '\0') {
return 0;
}
char ch2 = str[i];
if (ch2 == '\0') {
return -1;
}
if (ch1 != ch2) {
return -2;
}
}
return (-3);
}
i am getting invalid mem access 'mem_or_null' error when i try to load this program.
libbpf: load bpf program failed: Permission denied
libbpf: -- BEGIN DUMP LOG ---
libbpf:
Validating matchPrefix() func#1...
38: R1=mem_or_null(id=2,off=0,imm=0) R10=fp0
; int matchPrefix(char str[MAX_FILE_NAME_LENGTH]) {
38: (18) r0 = 0xffffffff ; R0_w=P4294967295
; char ch2 = str[i];
40: (71) r2 = *(u8 *)(r1 +0)
R1 invalid mem access 'mem_or_null'
processed 2 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
libbpf: -- END LOG --
libbpf: failed to load program 'syscall_enter_open'
R1 is the register for first argument. which is a char array on stack. Do i need to pass length of array separately?
the function is called this way
char filename[MAX_FILE_NAME_LENGTH];
bpf_probe_read_user(filename, sizeof(filename), args->filename);
if (matchPrefix(filename) != 0) {
return 0;
}
Even if i change the function signature to accept a char * , there is some other error R1 invalid mem access 'scalar'.
Can someone help in understanding why am i getting this error in function verification?
TL;DR. Making your matchPrefix function a static inline one should work around the verifier issue.
I believe this is happening because the BPF verifier recognizes your function as a global one (vs. inlined) and therefore verifies it independently. That means it won't assume anything for the arguments. Thus, the str argument is recognized as mem_or_null and verification fails because you didn't check that pointer isn't null.
Inlining the function will work around this issue because the verifier won't see a function anymore. It will be able to preserve the inferred type of filename when verifying the code that corresponds to the body of matchPrefix.
there is easier solution using strcmp.
find in xdp-project/bpf-next
code from the same is
int strcmp(const char *cs, const char *ct)
{
unsigned char c1, c2;
while (1) {
c1 = *cs++;
c2 = *ct++;
if (c1 != c2)
return c1 < c2 ? -1 : 1;
if (!c1)
break;
}
return 0;
}
Do let me know if you still have issue.
NOTE: you cannot use #define to define string.
do reverify line
char ch1 = LOG_DIR[i];

BPF verifier rejetcs the use of an inode ptr as a key

I'm attempting to implement an eBPF code where I have a BPF MAP with the key of type struct inode * and some value.
Please see the below sample code
struct value {
char data[10];
};
struct bpf_map_def info SEC("maps") ={
.type = BPF_MAP_TYPE_HASH,
.max_entries = 100,
.key_size = sizeof(struct inode *),
.value_size = sizeof(struct value),
.map_flags = BPF_F_NO_PREALLOC,
};
SEC("fexit/vfs_unlink")
int BPF_PROG(
vfs_unlink_exit,
const struct user_namespace *mnt_userns,
const struct inode *dir,
const struct dentry *dentry,
const struct inode **delegated_inode,
int ret)
{
struct inode * p = BPF_CORE_READ(dentry,d_inode);
struct value *val = bpf_map_lookup_elem(&info, p);
if (val == NULL)
{
bpf_printk("not handling");
return 0;
}
return 0;
}
The verifier does not like this :
libbpf: -- BEGIN DUMP LOG ---
libbpf:
R1 type=ctx expected=fp
; int BPF_PROG(
0: (b7) r2 = 48
; int BPF_PROG(
1: (79) r3 = *(u64 *)(r1 +16)
func 'vfs_unlink' arg2 has btf_id 691 type STRUCT 'dentry'
2: (0f) r3 += r2
last_idx 2 first_idx 0
regs=4 stack=0 before 1: (79) r3 = *(u64 *)(r1 +16)
regs=4 stack=0 before 0: (b7) r2 = 48
3: (bf) r1 = r10
;
4: (07) r1 += -16
; struct inode * p = BPF_CORE_READ(dentry,d_inode);
5: (b7) r2 = 8
6: (85) call bpf_probe_read_kernel#113
last_idx 6 first_idx 0
regs=4 stack=0 before 5: (b7) r2 = 8
; struct inode * p = BPF_CORE_READ(dentry,d_inode);
7: (79) r2 = *(u64 *)(r10 -16)
; struct value *val = bpf_map_lookup_elem(&info, p);
8: (18) r1 = 0xffff93c9f43fc000
10: (85) call bpf_map_lookup_elem#1
R2 type=inv expected=fp, pkt, pkt_meta, map_key, map_value
processed 10 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
libbpf: -- END LOG --
Im also not able to pass directly the parameter ptr as the key eg :
SEC("fexit/vfs_unlink")
int BPF_PROG(
vfs_unlink_exit,
const struct user_namespace *mnt_userns,
const struct inode *dir,
const struct dentry *dentry,
const struct inode **delegated_inode,
int ret)
{
struct value *val = bpf_map_lookup_elem(&info, dir);
if (val == NULL)
{
bpf_printk("not handling");
return 0;
}
return 0;
}
produces:
libbpf: -- BEGIN DUMP LOG ---
libbpf:
R1 type=ctx expected=fp
; int BPF_PROG(
0: (79) r2 = *(u64 *)(r1 +8)
func 'vfs_unlink' arg1 has btf_id 694 type STRUCT 'inode'
; struct value *val = bpf_map_lookup_elem(&info, dir);
1: (18) r1 = 0xffff93caed64c400
3: (85) call bpf_map_lookup_elem#1
R2 type=ptr_ expected=fp, pkt, pkt_meta, map_key, map_value
processed 3 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
libbpf: -- END LOG --
I don't understand why, as similar approach with eg struct socket * works fine.
Any help would be greatly appreciated.
Kernel v 5.13.0-051300-generic
R2 type=inv expected=fp, pkt, pkt_meta, map_key, map_value
The verifier tells you that the second argument (in R2) to your call to bpf_map_lookup_elem() is not of the expected type. It can point to the stack (fp), to packet data (pkt) or metadata (pkt_meta), or to another map key or value (map_key, map_value).
In your case, BPF_CORE_READ() returns a scalar (inv), and dir to a BTF id (ptr_), and neither is suitable. You could try copying the data for your key to the eBPF stack first (assign it to a temporary variable, and pass a reference to that variable to the helper). You probably don't want to use the full struct inode as a key anyway.

When approaching the gpio register address of RaspberryPi, why is the result different between unsigned int* and char*?

Using mmap(), I am going to write a value to the GPIO register address of the Raspberry Pi.
I thought the register value would have the same when reading mapped GPIO address in unsigned int * or char *, but it was not. I compared the results for both cases.
This is my code.
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#define GPIO_BASE 0x3F200000
#define GPFSEL1 0x04
#define GPSET0 0x1C
#define GPCLR0 0x28
int main()
{
int fd = open("/dev/mem", O_RDWR|O_SYNC);
// Error Handling
if (fd < 0) {
printf("Can't open /dev/mem \n");
exit(1);
}
// Map pages of memory
char *gpio_memory_map = (char*)mmap(0, 4096, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, GPIO_BASE);
// Error Handling
if (gpio_memory_map == MAP_FAILED) {
printf("Error : mmap \n");
exit(-1);
}
// GPIO18
//volatile unsigned int *gpio = (volatile unsigned int*)gpio_memory_map;
//gpio[GPFSEL1/4] = (1<<24);
volatile char *gpio = (volatile char *)gpio_memory_map;
int i;
for (i = 0; i < 16; i++)
printf("gpio[%d](%#x) = %#0x\n", i, &gpio[i], gpio[i]);
/*
for (i = 0; i < 5; i++) {
gpio[GPCLR0 / 4] = (1 << 18);
sleep(1);
gpio[GPSET0 / 4] = (1 << 18);
sleep(1);
}
*/
// Unmap pages of memory
munmap(gpio_memory_map, 4096);
return 0;
}
And those below are the results.
volatile unsigned int *gpio = (volatile unsigned int *)gpio_memory_map;
gpio[0](0x76f12000) = 0x1
gpio[1](0x76f12004) = 0x1000000
gpio[2](0x76f12008) = 0
gpio[3](0x76f1200c) = 0x3fffffc0
gpio[4](0x76f12010) = 0x24000924
gpio[5](0x76f12014) = 0x924
gpio[6](0x76f12018) = 0
gpio[7](0x76f1201c) = 0x6770696f
gpio[8](0x76f12020) = 0x6770696f
gpio[9](0x76f12024) = 0x6770696f
gpio[10](0x76f12028) = 0x6770696f
gpio[11](0x76f1202c) = 0x6770696f
gpio[12](0x76f12030) = 0x6770696f
gpio[13](0x76f12034) = 0x2ffbbfff
gpio[14](0x76f12038) = 0x3ef4ff
gpio[15](0x76f1203c) = 0
volatile char *gpio = (volatile char *)gpio_memory_map;
As the result #1 above, I thought gpio[1], gpio[2], gpio[3] should be 0. But it was different. And even if I try to write a new value on gpio[1] or gpio[2] or gpio[3], it stays the same. Why are the results different when approaching char * and unsigned char *?
gpio[0](0x76f47000) = 0x1
gpio[1](0x76f47001) = 0x69
gpio[2](0x76f47002) = 0x70
gpio[3](0x76f47003) = 0x67
gpio[4](0x76f47004) = 0
gpio[5](0x76f47005) = 0x69
gpio[6](0x76f47006) = 0x70
gpio[7](0x76f47007) = 0x67
gpio[8](0x76f47008) = 0
gpio[9](0x76f47009) = 0x69
gpio[10](0x76f4700a) = 0x70
gpio[11](0x76f4700b) = 0x67
gpio[12](0x76f4700c) = 0xc0
gpio[13](0x76f4700d) = 0x69
gpio[14](0x76f4700e) = 0x70
gpio[15](0x76f4700f) = 0x67

Network packet counting: Failure to read packet data from a BPF socket filter

I'd like to count incoming network packets and it len in bytes for each TOS value. I created two maps, the first one with 256 entries which contains packet count of each TOS value and the second with packet bytes. So I've written the following eBPF socket filter:
struct bpf_insn prog[]{
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
//we use dgram socket, so packet starts directly from IP header
// BPF_LD_ABS(BPF_H, offsetof(struct ethhdr, h_proto)), // r0 = header type
// BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, ETH_P_IP, 2), // if (r0 == IPv4) skip 2
// BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
// BPF_EXIT_INSN(), // return
//check for IP version, we only interested in v4
BPF_LD_ABS(BPF_B, 0), // R0 = ip->vers: offsetof(struct iphdr, version)
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xF0), // r0 = r0 & 0xF0
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x40, 2), // if (r0 == 0x40) goto pc+2
BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
BPF_EXIT_INSN(), // return
// load packet TOS value
BPF_LD_ABS(BPF_B, offsetof(struct iphdr, tos)), // R0 = ip->tos
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), // *(u32 *)(fp - 4) = r0
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // r2 = fp
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // r2 = fp - 4
//first map with packet counters
BPF_LD_MAP_FD(BPF_REG_1, map_cnt_fd), // r1 = map_fd
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem), // r0 = map_lookup(r1, r2)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), // if (r0 == 0) goto pc+2
BPF_MOV64_IMM(BPF_REG_1, 1), // r1 = 1
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW,
BPF_REG_0, BPF_REG_1, 0, 0), // xadd r0 += r1
BPF_LD_ABS(BPF_B, offsetof(struct iphdr, tos)), // R0 = ip->tos
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), // *(u32 *)(fp - 4) = r0
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // r2 = fp - 4
//second map with packet bytes
BPF_LD_MAP_FD(BPF_REG_1, map_bytes_fd), // r1 = map_fd
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem), // r0 = map_lookup(r1, r2)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), // if (r0 == 0) goto pc+2
// FIXME big endian
BPF_LDX_MEM(BPF_H, BPF_REG_1, BPF_REG_6,
offsetof(struct iphdr, tot_len)), // r1 = tot_len
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW,
BPF_REG_0, BPF_REG_1, 0, 0), // xadd r0 += r1
BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
BPF_EXIT_INSN(),
};
Maps create without errors, socket filter program creates fine too and packet counter part works as it should. But bytes counter is always 0. What is the problem with that code?
I tried to write a simple example. To compile you just need to include bpf_insn.h.
Problem: Reading from the Socket Buffer
The context placed in BPF_REG_1 before the program starts is not a pointer to the beginning of the data. Instead, it is a pointer to a struct __sk_buff defined in the UAPI headers as follows:
struct __sk_buff {
__u32 len;
...
}
So when you attempt to read data from your IP header:
BPF_LDX_MEM(BPF_H, BPF_REG_1, BPF_REG_6, offsetof(struct iphdr, tot_len)),
You are in fact reading two bytes at offset 2 from the struct __sk_buff (let's call its pointer skb). Because your system is in little endian, this corresponds to the most significant bits for skb->len, which are 0 unless you have packets bigger than 2^16 bytes (unlikely).
We have two possible solutions here.
Solution 1: Use Absolute Load
We can update your program to read the IP length at the correct location. I believe this is not possible with a BPF_LDX_MEM(), because socket filters do not permit direct packet access. The workaround would be to use an absolute load instead. Your program would become:
struct bpf_insn prog[]{
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
// ... packet number counter, skipped for brevity
// Read IP length and store to r7 (preserved during helper calls)
BPF_LD_ABS(BPF_H,
offsetof(struct iphdr, tot_len)), // r0 = tot_len
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), // r7 = r0
// No need to parse ToS a second time here, skipped
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // r2 = fp - 4
//second map with packet bytes
BPF_LD_MAP_FD(BPF_REG_1, map_bytes_fd), // r1 = map_fd
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem), // r0 = map_lookup(r1, r2)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), // if (r0 == 0) goto pc+2
// Now add length to the counter
BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_7, 0), // xadd r0 += r7
BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
BPF_EXIT_INSN(),
};
Solution 2: Just use skb->len
The other solution is to get the length from skb, since the kernel has already computed it for us. This is just a matter of fixing the offset and length of the load you had, and your BPF_STX_MEM(), BPF_XADD() would become:
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
offsetof(struct __sk_buff, len)), // r1 = skb->len
BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0), // xadd r0 += r1

Why is typedef can not be used in local in systemveriliog?

Why is typedef can not be used in local in systemveriliog?
I am referencing from http://www.asic-world.com/systemverilog/data_types7.html
I have problem when I use struct instead of typedef struct. Why does it not use?
For your first question "why typedef cannot be used locally?"
Typedef can be used inside any SystemVerilog module and can be accessed/initialized based on our needs. Refer Section 6.18 User-defined types of SV LRM IEEE 1800 - 2012
Here is one example which uses typedef inside the module
struct { //using struct without typedef
byte a;
reg b;
shortint unsigned c;
} myStruct;
module struct_data ();
struct {
byte a;
reg b;
shortint unsigned c;
} myLocalStruct = '{11,1,101};
typedef struct { //using typedef inside the module
real r0, r1;
int i0, i1;
logic [ 7:0] opcode;
logic [23:0] address;
} instruction_word_t;
instruction_word_t IW;
assign IW = '{ real:1.0,default:0};
assign myStruct = '{10,0,100};
initial begin
#1;
$display ("a = %b b = %b c = %h", myStruct.a, myStruct.b, myStruct.c);
$display ("a = %b b = %b c = %h", myLocalStruct.a, myLocalStruct.b, myLocalStruct.c);
$display ("r0 = %b r1 = %d opcode = %h ,address = %h ",IW.r0, IW.r1, IW.opcode,IW.address);
#1 $finish;
end
endmodule
For your second question, we can also use struct without using typedef and I have shown in the example above.
output of the above code
a = 00001010 b = 0 c = 0064
a = 00001011 b = 1 c = 0065
r0 = 00000000000000000000000000000001 r1 = 1 opcode = 00 ,address = 000000