Network packet counting: Failure to read packet data from a BPF socket filter - ebpf

I'd like to count incoming network packets and it len in bytes for each TOS value. I created two maps, the first one with 256 entries which contains packet count of each TOS value and the second with packet bytes. So I've written the following eBPF socket filter:
struct bpf_insn prog[]{
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
//we use dgram socket, so packet starts directly from IP header
// BPF_LD_ABS(BPF_H, offsetof(struct ethhdr, h_proto)), // r0 = header type
// BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, ETH_P_IP, 2), // if (r0 == IPv4) skip 2
// BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
// BPF_EXIT_INSN(), // return
//check for IP version, we only interested in v4
BPF_LD_ABS(BPF_B, 0), // R0 = ip->vers: offsetof(struct iphdr, version)
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xF0), // r0 = r0 & 0xF0
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x40, 2), // if (r0 == 0x40) goto pc+2
BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
BPF_EXIT_INSN(), // return
// load packet TOS value
BPF_LD_ABS(BPF_B, offsetof(struct iphdr, tos)), // R0 = ip->tos
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), // *(u32 *)(fp - 4) = r0
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // r2 = fp
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // r2 = fp - 4
//first map with packet counters
BPF_LD_MAP_FD(BPF_REG_1, map_cnt_fd), // r1 = map_fd
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem), // r0 = map_lookup(r1, r2)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), // if (r0 == 0) goto pc+2
BPF_MOV64_IMM(BPF_REG_1, 1), // r1 = 1
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW,
BPF_REG_0, BPF_REG_1, 0, 0), // xadd r0 += r1
BPF_LD_ABS(BPF_B, offsetof(struct iphdr, tos)), // R0 = ip->tos
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), // *(u32 *)(fp - 4) = r0
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // r2 = fp - 4
//second map with packet bytes
BPF_LD_MAP_FD(BPF_REG_1, map_bytes_fd), // r1 = map_fd
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem), // r0 = map_lookup(r1, r2)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), // if (r0 == 0) goto pc+2
// FIXME big endian
BPF_LDX_MEM(BPF_H, BPF_REG_1, BPF_REG_6,
offsetof(struct iphdr, tot_len)), // r1 = tot_len
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW,
BPF_REG_0, BPF_REG_1, 0, 0), // xadd r0 += r1
BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
BPF_EXIT_INSN(),
};
Maps create without errors, socket filter program creates fine too and packet counter part works as it should. But bytes counter is always 0. What is the problem with that code?
I tried to write a simple example. To compile you just need to include bpf_insn.h.

Problem: Reading from the Socket Buffer
The context placed in BPF_REG_1 before the program starts is not a pointer to the beginning of the data. Instead, it is a pointer to a struct __sk_buff defined in the UAPI headers as follows:
struct __sk_buff {
__u32 len;
...
}
So when you attempt to read data from your IP header:
BPF_LDX_MEM(BPF_H, BPF_REG_1, BPF_REG_6, offsetof(struct iphdr, tot_len)),
You are in fact reading two bytes at offset 2 from the struct __sk_buff (let's call its pointer skb). Because your system is in little endian, this corresponds to the most significant bits for skb->len, which are 0 unless you have packets bigger than 2^16 bytes (unlikely).
We have two possible solutions here.
Solution 1: Use Absolute Load
We can update your program to read the IP length at the correct location. I believe this is not possible with a BPF_LDX_MEM(), because socket filters do not permit direct packet access. The workaround would be to use an absolute load instead. Your program would become:
struct bpf_insn prog[]{
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
// ... packet number counter, skipped for brevity
// Read IP length and store to r7 (preserved during helper calls)
BPF_LD_ABS(BPF_H,
offsetof(struct iphdr, tot_len)), // r0 = tot_len
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), // r7 = r0
// No need to parse ToS a second time here, skipped
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // r2 = fp - 4
//second map with packet bytes
BPF_LD_MAP_FD(BPF_REG_1, map_bytes_fd), // r1 = map_fd
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_map_lookup_elem), // r0 = map_lookup(r1, r2)
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), // if (r0 == 0) goto pc+2
// Now add length to the counter
BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_7, 0), // xadd r0 += r7
BPF_MOV64_IMM(BPF_REG_0, 0), // r0 = 0
BPF_EXIT_INSN(),
};
Solution 2: Just use skb->len
The other solution is to get the length from skb, since the kernel has already computed it for us. This is just a matter of fixing the offset and length of the load you had, and your BPF_STX_MEM(), BPF_XADD() would become:
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
offsetof(struct __sk_buff, len)), // r1 = skb->len
BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0), // xadd r0 += r1

Related

eBPF verifier: R1 is not a scalar

I have this eBPF code:
struct sock_info {
struct sockaddr addr;
};
SEC("tracepoint/syscalls/sys_enter_accept4")
int sys_enter_accept4(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen, int flags) {
struct sock_info *iad = bpf_ringbuf_reserve(&connections, sizeof(struct sock_info), 0);
if (!iad) {
bpf_printk("can't reserve ringbuf space");
return 0;
}
// https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
bpf_probe_read(&iad->addr, sizeof(struct sockaddr), upeer_sockaddr);
bpf_ringbuf_submit(iad, 0);
return 0;
}
When I try to load it from the user space, the Cilium eBPF library returns me this Verification error:
permission denied
R1 is not a scalar
; int sys_enter_accept4(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen, int flags) {
0: (bf) r6 = r2
R2 !read_ok
processed 1 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
If I remove the bpf_probe_read function, then the code runs. I tried many alternatives to try to read the contents of the *upeer_sockaddr pointer, but did not succeed.
Any hint why the eBPF verifier is complaining?
This is the output of llvm-objdump command:
llvm-objdump -S --no-show-raw-insn pkg/ebpf/bpf_bpfel.o
pkg/ebpf/bpf_bpfel.o: file format elf64-bpf
Disassembly of section tracepoint/syscalls/sys_enter_accept4:
0000000000000000 <sys_enter_accept4>:
0: r6 = r2
1: r1 = 0 ll
3: r2 = 16
4: r3 = 0
5: call 131
6: r7 = r0
7: if r7 != 0 goto +5 <LBB0_2>
8: r1 = 0 ll
10: r2 = 28
11: call 6
12: goto +7 <LBB0_3>
0000000000000068 <LBB0_2>:
13: r1 = r7
14: r2 = 16
15: r3 = r6
16: call 4
17: r1 = r7
18: r2 = 0
19: call 132
00000000000000a0 <LBB0_3>:
20: r0 = 0
21: exit
You have defined your tracepoint program with 4 arguments int sys_enter_accept4(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen, int flags)
But these are not the parameters with which your program will be invoked.
The R2 !read_ok error is caused because you are accessing a second, non-existing parameter and you are not allowed to read from uninitialized registers.
For tracepoints you can find out the context structure by looking at the sysfs:
$ cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_accept4/format
name: sys_enter_accept4
ID: 1595
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;
field:int __syscall_nr; offset:8; size:4; signed:1;
field:int fd; offset:16; size:8; signed:0;
field:struct sockaddr * upeer_sockaddr; offset:24; size:8; signed:0;
field:int * upeer_addrlen; offset:32; size:8; signed:0;
field:int flags; offset:40; size:8; signed:0;
print fmt: "fd: 0x%08lx, upeer_sockaddr: 0x%08lx, upeer_addrlen: 0x%08lx, flags: 0x%08lx", ((unsigned long)(REC->fd)), ((unsigned long)(REC->upeer_sockaddr)), ((unsigned long)(REC->upeer_addrlen)), ((unsigned long)(REC->flags))
If we turn this into a structure we get the following:
struct accept4_args {
u64 pad;
u32 __syscall_nr;
u32 fd;
struct sockaddr *upeer_sockaddr;
int *upeer_addrlen;
int flags;
};
Note that I replaced the common_ fields here with u64 pad, since that is likely not what you are interested in.
A pointer to the struct is passed in as one parameter: int sys_enter_accept4(struct accept4_args *args)

what causes the iteration of an array to conclude prematurely in metal?

What I'm trying to do
I'm testing out metals capability to work with loops. Since I can't define new constants in metal, I'm passing a uint into the buffer and use it to iterate over an array filled with integers. It looks like this in swift.
let array1: [Int] = [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]
The problem(s)
However when reading the result array buffer in Swift after completing the loop in metal, it seems like not every element has been allocated.
#include <metal_stdlib>
using namespace metal;
kernel void shader(constant int *arr [[ buffer(0) ]],
device int *resultArray [[ buffer(1) ]],
constant uint &iter [[ buffer(2) ]]) // value of 12
{
for (uint i = 0; i < iter; i++){
resultArray[i] = arr[i];
}
}
out
1
2
3
4
5
6
0
0
0
0
0
0
Similarly, using the iterator to set allocate each element of resultArray, yields strange results
for (uint i = 0; i < iter; i++){
resultArray[i] = i;
}
out
4294967296
12884901890
21474836484
30064771078
38654705672
47244640266
0
0
0
0
0
0
Multiplication seems to work
for (uint i = 0; i < iter; i++){
resultArray[i] = arr[i] * i;
}
out
0
4
12
24
40
60
0
0
0
0
0
0
Addition does not
for (uint i = 0; i < iter; i++){
resultArray[i] = arr[i] + i;
}
out
4294967297
12884901892
21474836487
30064771082
38654705677
47244640272
0
0
0
0
0
0
When however, I set iter to a value of for example 24 or higher, it at least iterated over the whole arrays of size 12.
for (uint i = 0; i < iter; i++){ // iter now value of 100
resultArray[i] = arr[i] * iter;
}
100
200
300
400
500
600
100
200
300
400
500
600
What is going on here?
MCVE
yes, it's a lot of code to get a simple loop running in metal, please bare with me
main.swift
import MetalKit
let array1: [Int] = [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]
func gpuProcess(arr1: [Int]) {
let size = arr1.count // value of 12
// GPU we want to use
let device = MTLCreateSystemDefaultDevice()
// Fifo queue for sending commands to the gpu
let commandQueue = device?.makeCommandQueue()
// The library for getting our metal functions
let gpuFunctionLibrary = device?.makeDefaultLibrary()
// Grab gpu function
let additionGPUFunction = gpuFunctionLibrary?.makeFunction(name: "shader")
var additionComputePipelineState: MTLComputePipelineState!
do {
additionComputePipelineState = try device?.makeComputePipelineState(function: additionGPUFunction!)
} catch {
print(error)
}
// Create buffers to be sent to the gpu from our array
let arr1Buff = device?.makeBuffer(bytes: arr1,
length: MemoryLayout<Int>.size * size ,
options: .storageModeShared)
let resultBuff = device?.makeBuffer(length: MemoryLayout<Int>.size * size,
options: .storageModeShared)
// Create the buffer to be sent to the command queue
let commandBuffer = commandQueue?.makeCommandBuffer()
// Create an encoder to set values on the compute function
let commandEncoder = commandBuffer?.makeComputeCommandEncoder()
commandEncoder?.setComputePipelineState(additionComputePipelineState)
// Set the parameters of our gpu function
commandEncoder?.setBuffer(arr1Buff, offset: 0, index: 0)
commandEncoder?.setBuffer(resultBuff, offset: 0, index: 1)
// Set parameters for our iterator
var count = size
commandEncoder?.setBytes(&count, length: MemoryLayout.size(ofValue: count), index: 2)
// Figure out how many threads we need to use for our operation
let threadsPerGrid = MTLSize(width: 1, height: 1, depth: 1)
let maxThreadsPerThreadgroup = additionComputePipelineState.maxTotalThreadsPerThreadgroup // 1024
let threadsPerThreadgroup = MTLSize(width: maxThreadsPerThreadgroup, height: 1, depth: 1)
commandEncoder?.dispatchThreads(threadsPerGrid,
threadsPerThreadgroup: threadsPerThreadgroup)
// Tell encoder that it is done encoding. Now we can send this off to the gpu.
commandEncoder?.endEncoding()
// Push this command to the command queue for processing
commandBuffer?.commit()
// Wait until the gpu function completes before working with any of the data
commandBuffer?.waitUntilCompleted()
// Get the pointer to the beginning of our data
var resultBufferPointer = resultBuff?.contents().bindMemory(to: Int.self,
capacity: MemoryLayout<Int>.size * size)
// Print out all of our new added together array information
for _ in 0..<size {
print("\(Int(resultBufferPointer!.pointee) as Any)")
resultBufferPointer = resultBufferPointer?.advanced(by: 1)
}
}
// Call function
gpuProcess(arr1: array1)
compute.metal
#include <metal_stdlib>
using namespace metal;
kernel void shader(constant int *arr [[ buffer(0) ]],
device int *resultArray [[ buffer(1) ]],
constant uint &iter [[ buffer(2) ]]) // value of 12
{
for (uint i = 0; i < iter; i++){
resultArray[i] = arr[i] * iter;
}
}
You are using 64 bit Int in Swift and 32 bit integers in MSL. Your GPU threads are also overlapping their work. Instead, use Int32 in Swift and make each thread process their own piece of data. Like this
import MetalKit
let array1: [Int32] = [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]
func gpuProcess(arr1: [Int32]) {
let size = arr1.count // value of 12
// GPU we want to use
let device = MTLCreateSystemDefaultDevice()
// Fifo queue for sending commands to the gpu
let commandQueue = device?.makeCommandQueue()
// The library for getting our metal functions
let gpuFunctionLibrary = device?.makeDefaultLibrary()
// Grab gpu function
let additionGPUFunction = gpuFunctionLibrary?.makeFunction(name: "shader")
var additionComputePipelineState: MTLComputePipelineState!
do {
additionComputePipelineState = try device?.makeComputePipelineState(function: additionGPUFunction!)
} catch {
print(error)
}
// Create buffers to be sent to the gpu from our array
let arr1Buff = device?.makeBuffer(bytes: arr1,
length: MemoryLayout<Int32>.stride * size ,
options: .storageModeShared)
let resultBuff = device?.makeBuffer(length: MemoryLayout<Int32>.stride * size,
options: .storageModeShared)
// Create the buffer to be sent to the command queue
let commandBuffer = commandQueue?.makeCommandBuffer()
// Create an encoder to set values on the compute function
let commandEncoder = commandBuffer?.makeComputeCommandEncoder()
commandEncoder?.setComputePipelineState(additionComputePipelineState)
// Set the parameters of our gpu function
commandEncoder?.setBuffer(arr1Buff, offset: 0, index: 0)
commandEncoder?.setBuffer(resultBuff, offset: 0, index: 1)
// Set parameters for our iterator
var count = size
commandEncoder?.setBytes(&count, length: MemoryLayout.size(ofValue: count), index: 2)
// Figure out how many threads we need to use for our operation
let threadsPerGrid = MTLSize(width: 1, height: 1, depth: 1)
let maxThreadsPerThreadgroup = additionComputePipelineState.maxTotalThreadsPerThreadgroup // 1024
let threadsPerThreadgroup = MTLSize(width: maxThreadsPerThreadgroup, height: 1, depth: 1)
commandEncoder?.dispatchThreads(threadsPerGrid,
threadsPerThreadgroup: threadsPerThreadgroup)
// Tell encoder that it is done encoding. Now we can send this off to the gpu.
commandEncoder?.endEncoding()
// Push this command to the command queue for processing
commandBuffer?.commit()
// Wait until the gpu function completes before working with any of the data
commandBuffer?.waitUntilCompleted()
// Get the pointer to the beginning of our data
var resultBufferPointer = resultBuff?.contents().bindMemory(to: Int32.self,
capacity: MemoryLayout<Int32>.stride * size)
// Print out all of our new added together array information
for _ in 0..<size {
print("\(Int32(resultBufferPointer!.pointee) as Any)")
resultBufferPointer = resultBufferPointer?.advanced(by: 1)
}
}
// Call function
gpuProcess(arr1: array1)
Kernel:
#include <metal_stdlib>
using namespace metal;
kernel void shader(constant int *arr [[ buffer(0) ]],
device int *resultArray [[ buffer(1) ]],
constant uint &iter [[ buffer(2) ]],
uint gid [[ thread_position_in_grid ]], // this is thread index in grid, since you have height and depth of a dispatch set to 1 in CPU code, you can use 1D `int` here.
)
{
// Early out if gid is out of array boudns
if(gid >= iter)
{
return;
}
// Each thread processes it's own data
resultArray[gid] = arr[gid] * iter;
}
For more information on how to use Metal for compute refer to developer docs and for the information about attributes such as thread_position_in_grid refer to Metal Shading Language specification.

BPF verifier rejetcs the use of an inode ptr as a key

I'm attempting to implement an eBPF code where I have a BPF MAP with the key of type struct inode * and some value.
Please see the below sample code
struct value {
char data[10];
};
struct bpf_map_def info SEC("maps") ={
.type = BPF_MAP_TYPE_HASH,
.max_entries = 100,
.key_size = sizeof(struct inode *),
.value_size = sizeof(struct value),
.map_flags = BPF_F_NO_PREALLOC,
};
SEC("fexit/vfs_unlink")
int BPF_PROG(
vfs_unlink_exit,
const struct user_namespace *mnt_userns,
const struct inode *dir,
const struct dentry *dentry,
const struct inode **delegated_inode,
int ret)
{
struct inode * p = BPF_CORE_READ(dentry,d_inode);
struct value *val = bpf_map_lookup_elem(&info, p);
if (val == NULL)
{
bpf_printk("not handling");
return 0;
}
return 0;
}
The verifier does not like this :
libbpf: -- BEGIN DUMP LOG ---
libbpf:
R1 type=ctx expected=fp
; int BPF_PROG(
0: (b7) r2 = 48
; int BPF_PROG(
1: (79) r3 = *(u64 *)(r1 +16)
func 'vfs_unlink' arg2 has btf_id 691 type STRUCT 'dentry'
2: (0f) r3 += r2
last_idx 2 first_idx 0
regs=4 stack=0 before 1: (79) r3 = *(u64 *)(r1 +16)
regs=4 stack=0 before 0: (b7) r2 = 48
3: (bf) r1 = r10
;
4: (07) r1 += -16
; struct inode * p = BPF_CORE_READ(dentry,d_inode);
5: (b7) r2 = 8
6: (85) call bpf_probe_read_kernel#113
last_idx 6 first_idx 0
regs=4 stack=0 before 5: (b7) r2 = 8
; struct inode * p = BPF_CORE_READ(dentry,d_inode);
7: (79) r2 = *(u64 *)(r10 -16)
; struct value *val = bpf_map_lookup_elem(&info, p);
8: (18) r1 = 0xffff93c9f43fc000
10: (85) call bpf_map_lookup_elem#1
R2 type=inv expected=fp, pkt, pkt_meta, map_key, map_value
processed 10 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
libbpf: -- END LOG --
Im also not able to pass directly the parameter ptr as the key eg :
SEC("fexit/vfs_unlink")
int BPF_PROG(
vfs_unlink_exit,
const struct user_namespace *mnt_userns,
const struct inode *dir,
const struct dentry *dentry,
const struct inode **delegated_inode,
int ret)
{
struct value *val = bpf_map_lookup_elem(&info, dir);
if (val == NULL)
{
bpf_printk("not handling");
return 0;
}
return 0;
}
produces:
libbpf: -- BEGIN DUMP LOG ---
libbpf:
R1 type=ctx expected=fp
; int BPF_PROG(
0: (79) r2 = *(u64 *)(r1 +8)
func 'vfs_unlink' arg1 has btf_id 694 type STRUCT 'inode'
; struct value *val = bpf_map_lookup_elem(&info, dir);
1: (18) r1 = 0xffff93caed64c400
3: (85) call bpf_map_lookup_elem#1
R2 type=ptr_ expected=fp, pkt, pkt_meta, map_key, map_value
processed 3 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
libbpf: -- END LOG --
I don't understand why, as similar approach with eg struct socket * works fine.
Any help would be greatly appreciated.
Kernel v 5.13.0-051300-generic
R2 type=inv expected=fp, pkt, pkt_meta, map_key, map_value
The verifier tells you that the second argument (in R2) to your call to bpf_map_lookup_elem() is not of the expected type. It can point to the stack (fp), to packet data (pkt) or metadata (pkt_meta), or to another map key or value (map_key, map_value).
In your case, BPF_CORE_READ() returns a scalar (inv), and dir to a BTF id (ptr_), and neither is suitable. You could try copying the data for your key to the eBPF stack first (assign it to a temporary variable, and pass a reference to that variable to the helper). You probably don't want to use the full struct inode as a key anyway.

STM32F103C8T6 can not communicate with HD44780

I'm trying to control a HD44780 16X2 LCD(4 bit
communication) using a STM32F103C8T6.
I connected the LCD this way:
RS => PA0
EN => PA2
RW Ground
D7 => PB7
D6 => PB6
D5 => PB5
D4 => PB4
The LCD doesn't display anything. Where might the problem be? Does anyone know something about this issue?
Here is my code:
#include "delay.h"
#include "stm32f10x.h" // Device header
void lcd_command(unsigned char command);
void lcd_init(void);
void lcdPosition(unsigned int row, unsigned int column);
void lcd_data(unsigned char data);
int main() {
delay_init();
RCC->APB2ENR |= 1 << 2; // Port A Enabled.
RCC->APB2ENR |= 1 << 3; // Port B Enabled.
GPIOA->CRL = 0x22222222; // A0 and A2 Output.
GPIOB->CRL = 0x22222222; // B7,B6,B5,B4 Output.
GPIOB->ODR = 0x00; // Port B clear.
delay_ms(20);
lcd_command(0x30); // Datasheet says.
delay_ms(5);
lcd_command(0x30); // Datasheet says.
delay_ms(1);
lcd_command(0x30); // Datasheet says.
delay_ms(1);
lcd_init();
lcdPosition(1, 1); // first row first column
delay_ms(1);
lcd_data('A'); // Letter A
while (1)
;
}
void lcd_command(unsigned char command) {
GPIOA->BRR |= 1 << 0; // RS reset.
GPIOA->BSRR |= 1 << 2; // E set.
GPIOB->ODR = command; // upper nibble
delay_ms(2); // delay
GPIOA->BRR |= 1 << 2; // E reset.
GPIOB->BRR = 0x000000F0; // clear data bits
delay_ms(2); // delay
command = command << 4; // lower nibble
GPIOA->BSRR |= 1 << 2; // E set.
GPIOB->ODR = command; // lower nibble
delay_ms(2); // delay
GPIOA->BRR |= 1 << 2; // E reset.
GPIOB->BRR = 0x000000FF; // clear data bits
}
void lcd_init() {
lcd_command(0x02); // Return
delay_ms(2); // delay
lcd_command(0x28);
set 4 - bit data, 2 - line, 5x7 font delay_ms(2); // delay
lcd_command(0x0C);
turn on display, cursor off.delay_ms(2); // delay
lcd_command(0x01); // Clear.
delay_ms(2); // delay
lcd_command(0x06);
move cursor right delay_ms(4); // delay
}
void lcdPosition(unsigned int row, unsigned int column) {
if (row == 1) {
column--;
lcd_command(0x80 + column); // Define row
} else if (row == 2) {
column--;
lcd_command(0xC0 + column); // Define column
}
}
void lcd_data(unsigned char data) {
GPIOA->BSRR |= 1 << 0; // RS reset.
GPIOA->BSRR |= 1 << 2; // E set.
GPIOB->ODR = data;
upper nibble first delay_ms(4); // delay
GPIOA->BRR |= 1 << 2; // E reset.
GPIOB->BRR = 0x000000F0; // clear data bits
delay_ms(4); // delay
data = data << 4; // lower nibble
GPIOA->BSRR |= 1 << 2; // E set.
GPIOB->ODR = data; // lower nibble
delay_ms(4); // delay
GPIOA->BRR |= 1 << 2; // E reset.
GPIOB->BRR = 0x000000FF; // clear data bits
}
Also PA0 and PA2 are not 5V tolerant pins, usually HD44780 is 5V device, if it runs on +5V refer to the datasheet in Table 5. "Medium-density STM32F103xx pin definitions", if the pin is 5V tolerant in column I/O level it should be denoted with FT!
Please read carefully how to initialize 4-bit interface in the datasheet at page 46, figure 24.
In short: your lcd_command sends two nibbles one after another, but you are required to send only one nibble for first several commands (since the display controller still is in the 8-bit mode). You'll need to have a separate function to send only one nibble.

Creating a packet as a string and then extracted its fields in C

I need to implement my own packets to send over UDP. I decided that I would do this by sending a char buffer which has the sequence number, checksum, size, and the data of the packet which is bytes from a file. The string i'm sending separates each field by a semicolon. Then, when I receive the string (which is my packet) I want to extract each felid, use them accordingly (the sequence number, size, and checksum) and write the bytes to a file. So far I have wrote a method to create 100 packets, and I'm trying to extract and write the bytes to a file (I'm not doing it in the receiver yet, first I'm testing the parsing in the sender). For some reason, the bytes written to my file are incorrect and I'm getting "JPEG DATATSTREAM CONTAINS NO IMAGE" error when I try to open it.
struct packetNode{
char packet[1052]; // this is the entire packet data including the header
struct packetNode *next;
};
This is how I'm creating my packets:
//populate initial window of size 100
for(i = 0; i < 100; i++){
memset(&data[0], 0, sizeof(data));
struct packetNode *p; // create packet node
p = (struct packetNode *)malloc(sizeof(struct packetNode));
bytes = fread(data, 1, sizeof(data), fp); // read 1024 bytes from file into data buffer
int b = fwrite(data, 1, bytes, fpNew);
printf("read: %d\n", bytes);
memset(&p->packet[0], 0, sizeof(p->packet));
sprintf(p->packet, "%d;%d;%d;%s", s, 0, numPackets, data); // create packet
//calculate checksum
int check = checksum8(p->packet, sizeof(p->packet));
sprintf(p->packet, "%d;%d;%d;%s", s, check, numPackets, data); //put checksum in packet
s++; //incremenet sequence number
if(i == 0){
head = p;
tail = p;
tail->next = NULL;
}
else{
tail->next = p;
tail = p;
tail->next = NULL;
}
}
fclose(fp);
and this is where I parse and write the bytes to a file:
void test(){
FILE *fpNew = fopen("test.jpg", "w");
struct packetNode *ptr = head;
char *tokens;
int s, c, size;
int i = 0;
char data[1024];
while(ptr != NULL){
memset(&data[0], 0, sizeof(data));
tokens = strtok(ptr->packet,";");
s = atoi(tokens);
tokens = strtok(NULL, ";");
c = atoi(tokens);
tokens = strtok(NULL, ";");
size = atoi(tokens);
tokens = strtok(NULL, ";");
if(tokens != NULL)
strcpy(data, tokens);
printf("sequence: %d, checksum: %d, size: %d\n", s,c,size);
int b = fwrite(data, 1, sizeof(data), fpNew);
ptr = ptr->next;
i++;
}
fclose(fpNew);
}
Since there is transfer of binary data, a JPEG stream, this data cannot be treated as a string. It's better to go all binary. For instance, instead of
sprintf(p->packet, "%d;%d;%d;%s", s, 0, numPackets, data); // create packet
you would do
sprintf(p->packet, "%d;%d;%d;", s, 0, numPackets);
memcpy(&p->packet[strlen(p->packet)], data, bytes);
but this leads to parsing problems: we would need to change this:
tokens = strtok(NULL, ";");
if(tokens != NULL)
strcpy(data, tokens);
to something like this:
tokens += 1 + ( size < 10 ? 1 : size < 100 ? 2 : size < 1000 ? 3 : size < 10000 ? 4 : 5 );
memcpy(data, tokens, sizeof(data));
#Binary Protocol
It's easier to use a binary packet:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#pragma push(pack,1)
typedef struct Packet {
int seq, maxseq, size;
unsigned short cksum;
unsigned char payload[];
} Packet;
#pragma pop(pack)
typedef struct PacketNode{
struct PacketNode * next;
Packet packet;
} PacketNode;
PacketNode * allocPacketNode(int maxPayloadSize) {
void * ptr = malloc(sizeof(PacketNode) + maxPayloadSize); // FIXME: error checking
memset(ptr, 0, sizeof(PacketNode) + maxPayloadSize); // mallocz wouldn't cooperate
return (PacketNode*) ptr;
}
PacketNode * prepare(FILE * fp, int fsize, int chunksize)
{
PacketNode * head = allocPacketNode(chunksize);
PacketNode * pn = head;
int rd, seq = 0;
int maxseq = fsize / chunksize + ( fsize % chunksize ? 1 : 0 );
while ( ( rd = fread(pn->packet.payload, 1, chunksize, fp ) ) > 0 )
{
printf("read %d bytes\n", rd);
pn->packet.seq = seq++;
pn->packet.maxseq = maxseq;
pn->packet.size = rd + sizeof(Packet);
pn->packet.cksum = 0;
pn->packet.cksum = ~checksum(&pn->packet, pn->packet.size);
if ( rd == chunksize )
pn = pn->next = allocPacketNode(chunksize);
}
return head;
}
int checksum(unsigned char * data, int len)
{
int sum = 0, i;
for ( i = 0; i < len; i ++ )
sum += data[i];
if ( sum > 0xffff )
sum = (sum & 0xffff) + (sum>>16);
return sum;
}
void test( PacketNode * ptr ) {
FILE *fpNew = fopen("test.jpg", "w");
while (ptr != NULL)
{
printf("sequence: %d/%d, checksum: %04x, size: %d\n",
ptr->packet.seq,
ptr->packet.maxseq,
ptr->packet.cksum,
ptr->packet.size - sizeof(Packet)
);
int b = fwrite(ptr->packet.payload, ptr->packet.size - sizeof(Packet), 1, fpNew);
ptr = ptr->next;
}
fclose(fpNew);
}
void fatal( const char * msg ) { printf("FATAL: %s\n", msg); exit(1); }
int main(int argc, char** argv)
{
if ( ! argv[1] ) fatal( "missing filename argument" );
FILE * fp = fopen( argv[1], "r" );
if ( ! fp ) fatal( "cannot open file" );
fseek( fp, 0, SEEK_END );
long fsize = ftell(fp);
fseek( fp, 0, SEEK_SET );
printf("Filesize: %d\n", fsize );
test( prepare(fp, fsize, 1024) );
}
The #pragma push(pack,1) changes how the compiler aligns fields of the struct. We want them to be compact, for network transport. Using 1 is byte-aligned. The #pragma pop(pack) restores the previous setting of the pack pragma.
A note on the checksum method
First we calculate the sum of all the bytes in the packet:
int sum = 0, i;
for ( i = 0; i < len; i ++ )
sum += data[i];
Since the packet uses an unsigned short (16 bits, max value 65535 or 0xffff) to store the checksum, we make sure that the result will fit:
if ( sum > 0xffff ) // takes up more than 16 bits.
Getting the low 16 bits of this int is done using sum & 0xffff, masking out everything but the low 16 bits. We could simply return this value, but we would loose the information from higher checksum bits. So, we will add the upper 16 bits to the lower 16 bits. Accessing the higher 16 bits is done by shifting the int to the right 16 bits, like so: sum >> 16. This is the same as sum / 65536, since 65536 = 216 = 1 << 16.
sum = (sum & 0xffff) + (sum>>16); // add low 16 bits and high 16 bits
I should note that network packet checksums are usually computed 2 bytes (or 'octets' as they like to call them there) at a time. For that, the data should be cast to an unsigned short *, and len should be divided by 2. However! len may be odd, so in that case we'll need to take special care of the last byte. For instance, assuming that the maximum packet size is even, and that the len argument is always <= max_packet_size:
unsigned short * in = (unsigned short *) data;
if ( len & 1 ) data[len] = 0; // make sure last byte is 0
len = (len + 1) / 2;
The rest of the checksum method can remain the same, except that it should operate on in instead of data.