forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 1
/
biosnoop
executable file
·132 lines (109 loc) · 3.18 KB
/
biosnoop
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/python
#
# biosnoop Trace block device I/O and print details including issuing PID.
# For Linux, uses BCC, eBPF.
#
# This uses in-kernel eBPF maps to cache process details (PID and comm) by I/O
# request, as well as a starting timestamp for calculating I/O latency.
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 16-Sep-2015 Brendan Gregg Created this.
from __future__ import print_function
from bcc import BPF
# load BPF program
b = BPF(text="""
#include <uapi/linux/ptrace.h>
#include <linux/blkdev.h>
struct val_t {
char name[TASK_COMM_LEN];
};
BPF_HASH(start, struct request *);
BPF_HASH(pidbyreq, struct request *, u32);
BPF_HASH(commbyreq, struct request *, struct val_t);
// cache PID and comm by-req
int trace_pid_start(struct pt_regs *ctx, struct request *req)
{
u32 pid;
struct val_t val = {};
pid = bpf_get_current_pid_tgid();
pidbyreq.update(&req, &pid);
if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) {
commbyreq.update(&req, &val);
}
return 0;
}
// time block I/O
int trace_req_start(struct pt_regs *ctx, struct request *req)
{
u64 ts;
ts = bpf_ktime_get_ns();
start.update(&req, &ts);
return 0;
}
// output
int trace_req_completion(struct pt_regs *ctx, struct request *req)
{
u64 *tsp, delta;
u32 *pidp = 0;
struct val_t *valp;
// fetch timestamp and calculate delta
tsp = start.lookup(&req);
if (tsp == 0) {
// missed tracing issue
return 0;
}
delta = bpf_ktime_get_ns() - *tsp;
//
// Fetch and output issuing pid and comm.
// As bpf_trace_prink() is limited to a maximum of 1 string and 2
// integers, we'll use more than one to output the data.
//
valp = commbyreq.lookup(&req);
pidp = pidbyreq.lookup(&req);
if (pidp == 0 || valp == 0) {
bpf_trace_printk("0 0 ? %d\\n", req->__data_len);
} else {
bpf_trace_printk("0 %d %s %d\\n", *pidp, valp->name,
req->__data_len);
}
// output remaining details
if (req->cmd_flags & REQ_WRITE) {
bpf_trace_printk("1 W %s %d %d ?\\n", req->rq_disk->disk_name,
req->__sector, delta / 1000);
} else {
bpf_trace_printk("1 R %s %d %d ?\\n", req->rq_disk->disk_name,
req->__sector, delta / 1000);
}
start.delete(&req);
pidbyreq.delete(&req);
commbyreq.delete(&req);
return 0;
}
""")
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_account_io_completion",
fn_name="trace_req_completion")
# header
print("%-14s %-14s %-6s %-7s %-2s %-9s %-7s %7s" % ("TIME(s)", "COMM", "PID",
"DISK", "T", "SECTOR", "BYTES", "LAT(ms)"))
start_ts = 0
# format output
while 1:
(task, pid, cpu, flags, ts, msg) = b.trace_fields()
args = msg.split(" ")
if start_ts == 0:
start_ts = ts
if args[0] == "0":
(real_pid, real_comm, bytes_s) = (args[1], args[2], args[3])
continue
else:
(type_s, disk_s, sector_s, us_s) = (args[1], args[2], args[3],
args[4])
ms = float(int(us_s, 10)) / 1000
print("%-14.9f %-14.14s %-6s %-7s %-2s %-9s %-7s %7.2f" % (
ts - start_ts, real_comm, real_pid, disk_s, type_s, sector_s,
bytes_s, ms))