From 2c2d46f00a83380e1571bc86a2b4d25ad2eb8282 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 16 Nov 2017 17:14:38 +0530 Subject: [PATCH] Make 'tools/wakeuptime' use stack maps This modifies 'tools/wakeuptime' to use the stack maps feature. Rather than using manual, depth-limited stack walking with some architecture-specific code, using stack maps makes the script usable across different architectures. This also adds command line arguments to let the user to set the minimum and maximum amount of time in microseconds over which traces are stored. Signed-off-by: Sandipan Das --- tools/wakeuptime.py | 150 ++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 76 deletions(-) diff --git a/tools/wakeuptime.py b/tools/wakeuptime.py index e09840aa411d..9ca9c3f9fc4e 100755 --- a/tools/wakeuptime.py +++ b/tools/wakeuptime.py @@ -5,12 +5,6 @@ # # USAGE: wakeuptime [-h] [-u] [-p PID] [-v] [-f] [duration] # -# The current implementation uses an unrolled loop for x86_64, and was written -# as a proof of concept. This implementation should be replaced in the future -# with an appropriate bpf_ call, when available. -# -# Currently limited to a stack trace depth of 21 (maxdepth + 1). -# # Copyright 2016 Netflix, Inc. # Licensed under the Apache License, Version 2.0 (the "License") # @@ -21,6 +15,25 @@ from time import sleep, strftime import argparse import signal +import errno +from sys import stderr + +# arg validation +def positive_int(val): + try: + ival = int(val) + except ValueError: + raise argparse.ArgumentTypeError("must be an integer") + + if ival < 0: + raise argparse.ArgumentTypeError("must be positive") + return ival + +def positive_nonzero_int(val): + ival = positive_int(val) + if ival == 0: + raise argparse.ArgumentTypeError("must be nonzero") + return ival # arguments examples = """examples: @@ -37,21 +50,33 @@ parser.add_argument("-u", "--useronly", action="store_true", help="user threads only (no kernel threads)") parser.add_argument("-p", "--pid", + type=positive_int, help="trace this PID only") parser.add_argument("-v", "--verbose", action="store_true", help="show raw addresses") parser.add_argument("-f", "--folded", action="store_true", help="output folded format") +parser.add_argument("--stack-storage-size", default=1024, + type=positive_nonzero_int, + help="the number of unique stack traces that can be stored and " + "displayed (default 1024)") parser.add_argument("duration", nargs="?", default=99999999, + type=positive_nonzero_int, help="duration of trace, in seconds") +parser.add_argument("-m", "--min-block-time", default=1, + type=positive_nonzero_int, + help="the amount of time in microseconds over which we " + + "store traces (default 1)") +parser.add_argument("-M", "--max-block-time", default=(1 << 64) - 1, + type=positive_nonzero_int, + help="the amount of time in microseconds under which we " + + "store traces (default U64_MAX)") args = parser.parse_args() folded = args.folded duration = int(args.duration) debug = 0 -maxdepth = 20 # and MAXDEPTH if args.pid and args.useronly: - print("ERROR: use either -p or -u.") - exit() + parser.error("use either -p or -u.") # signal handler def signal_ignore(signal, frame): @@ -62,38 +87,17 @@ def signal_ignore(signal, frame): #include #include -#define MAXDEPTH 20 -#define MINBLOCK_US 1 +#define MINBLOCK_US MINBLOCK_US_VALUEULL +#define MAXBLOCK_US MAXBLOCK_US_VALUEULL struct key_t { + int w_k_stack_id; char waker[TASK_COMM_LEN]; char target[TASK_COMM_LEN]; - // Skip saving the ip - u64 ret[MAXDEPTH]; }; BPF_HASH(counts, struct key_t); BPF_HASH(start, u32); - -static u64 get_frame(u64 *bp) { - if (*bp) { - // The following stack walker is x86_64/arm64 specific - u64 ret = 0; - if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) - return 0; - if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) - return 0; -#ifdef __x86_64__ - if (ret < __START_KERNEL_map) -#elif __aarch64__ - if (ret < VA_START) -#else -#error "Unsupported architecture for stack walker" -#endif - return 0; - return ret; - } - return 0; -} +BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE) int offcpu(struct pt_regs *ctx) { u32 pid = bpf_get_current_pid_tgid(); @@ -118,41 +122,16 @@ def signal_ignore(signal, frame): // calculate delta time delta = bpf_ktime_get_ns() - *tsp; delta = delta / 1000; - if (delta < MINBLOCK_US) + if ((delta < MINBLOCK_US) || (delta > MAXBLOCK_US)) return 0; struct key_t key = {}; - u64 zero = 0, *val, bp = 0; - int depth = 0; + u64 zero = 0, *val; + key.w_k_stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID); bpf_probe_read(&key.target, sizeof(key.target), p->comm); bpf_get_current_comm(&key.waker, sizeof(key.waker)); - bp = PT_REGS_FP(ctx); - - // unrolled loop (MAXDEPTH): - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - if (!(key.ret[depth++] = get_frame(&bp))) goto out; - -out: + val = counts.lookup_or_init(&key, &zero); (*val) += delta; return 0; @@ -165,6 +144,12 @@ def signal_ignore(signal, frame): else: filter = '0' bpf_text = bpf_text.replace('FILTER', filter) + +# set stack storage size +bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) +bpf_text = bpf_text.replace('MINBLOCK_US_VALUE', str(args.min_block_time)) +bpf_text = bpf_text.replace('MAXBLOCK_US_VALUE', str(args.max_block_time)) + if debug: print(bpf_text) @@ -195,30 +180,43 @@ def signal_ignore(signal, frame): if not folded: print() + missing_stacks = 0 + has_enomem = False counts = b.get_table("counts") + stack_traces = b.get_table("stack_traces") for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): + # handle get_stackid errors + # check for an ENOMEM error + if k.w_k_stack_id == -errno.ENOMEM: + missing_stacks += 1 + continue + + waker_kernel_stack = [] if k.w_k_stack_id < 1 else \ + reversed(list(stack_traces.walk(k.w_k_stack_id))[1:]) + if folded: # print folded stack output - line = k.waker.decode() + ";" - for i in reversed(range(0, maxdepth)): - if k.ret[i] == 0: - continue - line = line + b.ksym(k.ret[i]) - if i != 0: - line = line + ";" - print("%s;%s %d" % (line, k.target.decode(), v.value)) + line = \ + [k.waker.decode()] + \ + [b.ksym(addr) + for addr in reversed(list(waker_kernel_stack))] + \ + [k.target.decode()] + print("%s %d" % (";".join(line), v.value)) else: # print default multi-line stack output print(" %-16s %s" % ("target:", k.target.decode())) - for i in range(0, maxdepth): - if k.ret[i] == 0: - break - print(" %-16x %s" % (k.ret[i], - b.ksym(k.ret[i]))) + for addr in waker_kernel_stack: + print(" %-16x %s" % (addr, b.ksym(addr))) print(" %-16s %s" % ("waker:", k.waker.decode())) print(" %d\n" % v.value) counts.clear() + if missing_stacks > 0: + enomem_str = " Consider increasing --stack-storage-size." + print("WARNING: %d stack traces could not be displayed.%s" % + (missing_stacks, enomem_str), + file=stderr) + if not folded: print("Detaching...") exit()