From 4c5e8c8e977af9f60606bcac943827546df564cf Mon Sep 17 00:00:00 2001 From: Littlefisher619 Date: Sun, 5 Feb 2023 02:54:05 +0800 Subject: [PATCH] clean up comments and add desc --- examples/bootstrap/README.md | 65 ++++++++++++++----- examples/bootstrap/bootstrap.c | 5 -- examples/runqlat/README.md | 115 +++++++++++++++++++++++++++++++-- examples/runqlat/runqlat.c | 97 --------------------------- 4 files changed, 156 insertions(+), 126 deletions(-) diff --git a/examples/bootstrap/README.md b/examples/bootstrap/README.md index 9f534d2..557f7a1 100644 --- a/examples/bootstrap/README.md +++ b/examples/bootstrap/README.md @@ -54,6 +54,8 @@ TIME EVENT COMM PID PPID FILENAME/EXIT CODE 18:57:59 EXEC sleep 74916 74910 /usr/bin/sleep ``` +The original c code is from [libbpf-bootstrap](https://github.com/libbpf/libbpf-bootstrap). + ## the compile process of the bootstrap.wasm We can provide a similar developing experience as the [libbpf-bootstrap](https://github.com/libbpf/libbpf-bootstrap) development. Just run `make` to build the wasm binary: @@ -70,6 +72,8 @@ This would invoke the following steps: clang -g -O2 -target bpf -D__TARGET_ARCH_x86 -I../../third_party/vmlinux/x86/ -idirafter /usr/lib/llvm-15/lib/clang/15.0.2/include -idirafter /usr/local/include -idirafter /usr/include/x86_64-linux-gnu -idirafter /usr/include -c bootstrap.bpf.c -o bootstrap.bpf.o ``` + The kernel part of the BPF program is exactly the same as the libbpf(Or any other style can be compiled by clang. The BCC style can be compiled in this way once the [bcc to libbpf converter](https://github.com/iovisor/bcc/issues/4404) is completed. + - generate the C header file from the BPF program: ```sh @@ -127,27 +131,54 @@ This would invoke the following steps: > > The `ecc` compiler in eunomia-bpf will use libclang and llvm to find all struct definitions in the header file, and automatically add more btf info to the ebpf object. The original `clang` may not always generate enough btf info for the wasm-bpf tool to generate the correct C header file. > - > **Note: This process and tools is not always required, you can do that mannually.** You can mannually write all event structs definitions with `__attribute__((packed))` to avoid padding bytes, and convert all pointer to correct integers between the host and the wasm side. All types must be defined the same size and layout as the host side in wasm as well. This would be easy for simple events, but it would be hard for complex programs, so we create the wasm specific `bpftool` to generate the C header file for the user space code contains all type defines and correct struct layout. + > **Note: This process and tools is not always required, you can do that mannually.** You can mannually write all event structs definitions with `__attribute__((packed))` to avoid padding bytes, and convert all pointer to correct integers between the host and the wasm side. All types must be defined the same size and layout as the host side in wasm as well. This would be easy for simple events, but it would be hard for complex programs, so we create the wasm specific `bpftool` to generate the C header file for the user space code contains all type defines and correct struct layout from the `BTF` info. > + > We have create a specical POC tool outside of `bpftool` for generate C structs serialization-free bindings between eBPF/host side and Wasm, you can find it in [c-struct-bindgen](https://github.com/eunomia-bpf/c-struct-bindgen). More details about how to deal with the struct layout issue can be found in the README of the c-struct-bindgen tool. The libbpf API for wasm program is provided as an header only library, you can find it in `libbpf-wasm.h` (wasm-include/libbpf-wasm.h). The wasm program can use the libbpf API and syscall to operate the BPF object, for example: - ```c + ```c + /* Load and verify BPF application */ + skel = bootstrap_bpf__open(); + /* Parameterize BPF code with minimum duration parameter */ + skel->rodata->min_duration_ns = env.min_duration_ms * 1000000ULL; + /* Load & verify BPF programs */ + err = bootstrap_bpf__load(skel); + /* Attach tracepoints */ + err = bootstrap_bpf__attach(skel); + ``` + The `rodata` section is used to store the global variables in the BPF program, and the `bss` section is used to store the global variables in the user space code, which will be memory mapped to the correct offset at the `bpftool gen skeleton` time, so libelf library is not required to be compiled in Wasm and the runtime can still dynamically load and operate the BPF object. -## run + The wasm side C code will be a slightly different from the native libbpf code, but it would provide the most ability from the eBPF side, for example, polling from the ring buffer or perf buffer, accessing the map from both the Wasm side and eBPF side, loading, attaching and detaching BPF programs, etc. It can support a lare number of eBPF program types and maps, covering the use cases of most eBPF programs from tracing, networking, security, etc. -```console -$ sudo ./wasm-bpf bootstrap.wasm -TIME EVENT COMM PID PPID FILENAME/EXIT CODE -18:57:58 EXEC sed 74911 74910 /usr/bin/sed -18:57:58 EXIT sed 74911 74910 [0] (2ms) -18:57:58 EXIT cat 74912 74910 [0] (0ms) -18:57:58 EXEC cat 74913 74910 /usr/bin/cat -18:57:59 EXIT cat 74913 74910 [0] (0ms) -18:57:59 EXEC cat 74914 74910 /usr/bin/cat -18:57:59 EXIT cat 74914 74910 [0] (0ms) -18:57:59 EXEC cat 74915 74910 /usr/bin/cat -18:57:59 EXIT cat 74915 74910 [0] (1ms) -18:57:59 EXEC sleep 74916 74910 /usr/bin/sleep -``` + Because some feature is missing in wasm side, for example, the signal handler is not support yet(2023/2), the original C code cannot be compiled to wasm directly, you need to modify the code slightly to make it work. We would try our best to make the wasm side libbpf API as close as possible to the native libbpf API, so maybe the user space code can be compiled to wasm directly in the future. More language bindings(Rust, Go, etc...) for wasm side bpf API will also be provided soon. + + The polling API would be a wrapped for both ring buffer and perf buffer, and the user space code can use the same API to poll events from either ring buffer or perf buffer, depends on the type specified in the BPF program. For example, a ring buffer polling for a map defined as `BPF_MAP_TYPE_RINGBUF`: + + ```c + struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); + } rb SEC(".maps"); + ``` + + You can use the following code to poll events from the ring buffer: + + ```c + rb = bpf_buffer__open(skel->maps.rb, handle_event, NULL); + /* Process events */ + printf("%-8s %-5s %-16s %-7s %-7s %s\n", "TIME", "EVENT", "COMM", "PID", + "PPID", "FILENAME/EXIT CODE"); + while (!exiting) { + // poll buffer + err = bpf_buffer__poll(rb, 100 /* timeout, ms */); + ``` + + No serialization overhead is required for the ring buffer polling. The `bpf_buffer__poll` API will call the `handle_event` function to process the event data in the ring buffer. + + The runtime is build on the top of libbpf [CO-RE](https://facebookmicrosites.github.io/bpf/blog/2020/02/19/bpf-portability-and-co-re.html)(Compile Once – Run Everywhere) API to load bpf object into the kernel, so a wasm-bpf program will not rely on the kernel version where it's built, and can be run on any kernel version with BPF CO-RE support. + + The size of `bootstrap.wasm` would be only `~90K`. It would be very easy to distributed over the network and can dynamically deploy, load and run on another machine in less than `100ms`. No `kernel header`, `LLVM`, `clang` dependency is required at the runtime, and don't need to do the heavy compilation work! + + For a more complex example, you can find the [runqlat](../runqlat/) program in the `examples` directory. diff --git a/examples/bootstrap/bootstrap.c b/examples/bootstrap/bootstrap.c index 94a5259..53b07f4 100644 --- a/examples/bootstrap/bootstrap.c +++ b/examples/bootstrap/bootstrap.c @@ -103,11 +103,6 @@ main(int argc, char **argv) fprintf(stderr, "Failed to create ring buffer\n"); goto cleanup; } - // instert map - int fd = bpf_map__fd(skel->maps.exec_start); - int lookup_key = 1, next_key; - uint64_t value; - bpf_map_update_elem(fd, &lookup_key, &value, BPF_ANY); /* Process events */ printf("%-8s %-5s %-16s %-7s %-7s %s\n", "TIME", "EVENT", "COMM", "PID", "PPID", "FILENAME/EXIT CODE"); diff --git a/examples/runqlat/README.md b/examples/runqlat/README.md index 1201538..738d348 100644 --- a/examples/runqlat/README.md +++ b/examples/runqlat/README.md @@ -1,15 +1,69 @@ -# examples +# Demo BPF applications -## build +## runqlat -```sh -make +Demonstrations of runqlat, the Linux eBPF/bcc version. + +This program summarizes scheduler run queue latency as a histogram, showing +how long tasks spent waiting their turn to run on-CPU. + +```console +$ sudo ./wasm-bpf runqlat.wasm -h +Summarize run queue (scheduler) latency as a histogram. + +USAGE: runqlat [--help] [interval] [count] + +EXAMPLES: + runqlat # summarize run queue latency as a histogram + runqlat 1 10 # print 1 second summaries, 10 times +$ sudo ./wasm-bpf runqlat.wasm 1 + +Tracing run queue latency... Hit Ctrl-C to end. + + usecs : count distribution + 0 -> 1 : 72 |***************************** | + 2 -> 3 : 93 |************************************* | + 4 -> 7 : 98 |****************************************| + 8 -> 15 : 96 |*************************************** | + 16 -> 31 : 38 |*************** | + 32 -> 63 : 4 |* | + 64 -> 127 : 5 |** | + 128 -> 255 : 6 |** | + 256 -> 511 : 0 | | + 512 -> 1023 : 0 | | + 1024 -> 2047 : 0 | | + 2048 -> 4095 : 1 | | ``` -## run +The distribution is bimodal, with one mode between 0 and 15 microseconds, +and another between 16 and 65 milliseconds. These modes are visible as the +spikes in the ASCII distribution (which is merely a visual representation +of the "count" column). As an example of reading one line: 809 events fell +into the 16384 to 32767 microsecond range (16 to 32 ms) while tracing. + +`runqlat` is alse an example of a simple (but realistic) BPF application. It +would show a more complex example of BPF program, which contains more than +one file, and directly access the kernel maps from the user space instead of +polling the kernel ring buffer. + +`bootstrap` is intended to be the starting point for your own BPF application, +with things like BPF CO-RE and vmlinux.h, consuming BPF ring buffer data, +command line arguments parsing, graceful Ctrl-C handling, etc. all taken care +of for you, which are crucial but mundane tasks that are no fun, but necessary +to be able to do anything useful. Just copy/paste and do simple renaming to get +yourself started. + +Here's an example output in minimum process duration mode: ```console -$ sudo wasm-runtime/build/bin/Release/wasm-bpf bootstrap.wasm +$ sudo sudo ./wasm-bpf bootstrap.wasm -h +BPF bootstrap demo application. + +It traces process start and exits and shows associated +information (filename, process duration, PID and PPID, etc). + +USAGE: ./bootstrap [-d ] -v +$ sudo ./wasm-bpf bootstrap.wasm TIME EVENT COMM PID PPID FILENAME/EXIT CODE 18:57:58 EXEC sed 74911 74910 /usr/bin/sed 18:57:58 EXIT sed 74911 74910 [0] (2ms) @@ -21,4 +75,51 @@ TIME EVENT COMM PID PPID FILENAME/EXIT CODE 18:57:59 EXEC cat 74915 74910 /usr/bin/cat 18:57:59 EXIT cat 74915 74910 [0] (1ms) 18:57:59 EXEC sleep 74916 74910 /usr/bin/sleep -``` \ No newline at end of file +``` + +The original c code is from [libbpf-bootstrap](https://github.com/libbpf/libbpf-bootstrap). + +## the compile process of the bootstrap.wasm + +We can provide a similar developing experience as the [libbpf-bootstrap](https://github.com/libbpf/libbpf-bootstrap) development. Just run `make` to build the wasm binary: + +```sh +make +``` + +For the description of the build process and the issues may occured, please refer to the [bootstrap/README.md](../bootstrap/README.md). + +## The maps API + +You can use the map API to access the kernel maps from the user space, for example: + +```c + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + err = bpf_map_lookup_elem(fd, &next_key, &hist); + ... + lookup_key = next_key; + } + lookup_key = -2; + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + err = bpf_map_delete_elem(fd, &next_key); + ... + lookup_key = next_key; + } +``` + +The runtime would use shared memory to access the kernel maps, and the kernel would update the maps in the shared memory, so the wasm code can access the maps directly, without any serialization or copy overhead between host and Wasm runtime. + +You can use the `bpf_map_update_elem` API to update the kernel maps from the user space, for example: + +```c + cg_map_fd = bpf_map__fd(obj->maps.cgroup_map); + cgfd = open(env.cgroupspath, O_RDONLY); + if (cgfd < 0) { + ... + } + if (bpf_map_update_elem(cg_map_fd, &idx, &cgfd, BPF_ANY)) { + ... + } +``` + +So the kernel eBPF can be config by wasm side or recieve the data from userspace wasm runtime when it is running. diff --git a/examples/runqlat/runqlat.c b/examples/runqlat/runqlat.c index c9827ef..f7a7ff2 100644 --- a/examples/runqlat/runqlat.c +++ b/examples/runqlat/runqlat.c @@ -11,7 +11,6 @@ #include #include #include -// #include "vmlinux.h" #include "runqlat.h" #include "runqlat.skel.h" #include "trace_helpers.h" @@ -60,84 +59,6 @@ print_usage(void) printf("%s\n", argp_program_doc); } -// static const struct argp_option opts[] = { -// { "timestamp", 'T', NULL, 0, "Include timestamp on output" }, -// { "milliseconds", 'm', NULL, 0, "Millisecond histogram" }, -// { "pidnss", OPT_PIDNSS, NULL, 0, "Print a histogram per PID namespace" }, -// { "pids", 'P', NULL, 0, "Print a histogram per process ID" }, -// { "tids", 'L', NULL, 0, "Print a histogram per thread ID" }, -// { "pid", 'p', "PID", 0, "Trace this PID only" }, -// { "verbose", 'v', NULL, 0, "Verbose debug output" }, -// { "cgroup", 'c', "/sys/fs/cgroup/unified", 0, "Trace process in cgroup path"}, -// { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, -// {}, -// }; - -// static error_t parse_arg(int key, char *arg, struct argp_state *state) -// { -// static int pos_args; - -// switch (key) { -// case 'h': -// argp_state_help(state, stderr, ARGP_HELP_STD_HELP); -// break; -// case 'v': -// env.verbose = true; -// break; -// case 'm': -// env.milliseconds = true; -// break; -// case 'p': -// errno = 0; -// env.pid = strtol(arg, NULL, 10); -// if (errno) { -// fprintf(stderr, "invalid PID: %s\n", arg); -// argp_usage(state); -// } -// break; -// case 'L': -// env.per_thread = true; -// break; -// case 'P': -// env.per_process = true; -// break; -// case OPT_PIDNSS: -// env.per_pidns = true; -// break; -// case 'T': -// env.timestamp = true; -// break; -// case 'c': -// env.cgroupspath = arg; -// env.cg = true; -// break; -// case ARGP_KEY_ARG: -// errno = 0; -// if (pos_args == 0) { -// env.interval = strtol(arg, NULL, 10); -// if (errno) { -// fprintf(stderr, "invalid internal\n"); -// argp_usage(state); -// } -// } else if (pos_args == 1) { -// env.times = strtol(arg, NULL, 10); -// if (errno) { -// fprintf(stderr, "invalid times\n"); -// argp_usage(state); -// } -// } else { -// fprintf(stderr, -// "unrecognized positional argument: %s\n", arg); -// argp_usage(state); -// } -// pos_args++; -// break; -// default: -// return ARGP_ERR_UNKNOWN; -// } -// return 0; -// } - static void sig_handler(int sig) { exiting = true; @@ -180,11 +101,6 @@ static int print_log2_hists(struct bpf_map *hists) int main(int argc, char **argv) { - // static const struct argp argp = { - // .options = opts, - // .parser = parse_arg, - // .doc = argp_program_doc, - // }; struct runqlat_bpf *obj; struct tm *tm; char ts[32]; @@ -193,9 +109,6 @@ int main(int argc, char **argv) int idx, cg_map_fd; int cgfd = -1; - // err = argp_parse(&argp, argc, argv, 0, NULL, NULL); - // if (err) - // return err; if (argc > 3 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0) { print_usage(); return 0; @@ -226,16 +139,6 @@ int main(int argc, char **argv) obj->rodata->targ_tgid = env.pid; obj->rodata->filter_cg = env.cg; - // if (probe_tp_btf("sched_wakeup")) { - // bpf_program__set_autoload(obj->progs.handle_sched_wakeup, false); - // bpf_program__set_autoload(obj->progs.handle_sched_wakeup_new, false); - // bpf_program__set_autoload(obj->progs.handle_sched_switch, false); - // } else { - // // bpf_program__set_autoload(obj->progs.sched_wakeup, false); - // // bpf_program__set_autoload(obj->progs.sched_wakeup_new, false); - // // bpf_program__set_autoload(obj->progs.sched_switch, false); - // } - err = runqlat_bpf__load(obj); if (err) { fprintf(stderr, "failed to load BPF object: %d\n", err);