forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
31cd104
commit 6f075b9
Showing
4 changed files
with
489 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
.TH biotop 8 "2016-02-06" "USER COMMANDS" | ||
.SH NAME | ||
biotop \- Block device (disk) I/O by process top. | ||
.SH SYNOPSIS | ||
.B biotop [\-h] [\-C] [\-r MAXROWS] [interval] [count] | ||
.SH DESCRIPTION | ||
This is top for disks. | ||
|
||
This traces block device I/O (disk I/O), and prints a per-process summary every | ||
interval (by default, 1 second). The summary is sorted on the top disk | ||
consumers by throughput (Kbytes). The PID and process name shown are measured | ||
from when the I/O was first created, which usually identifies the responsible | ||
process. | ||
|
||
For efficiency, this uses in-kernel eBPF maps to cache process details (PID and | ||
comm) by I/O request, as well as a starting timestamp for calculating I/O | ||
latency, and the final summary. | ||
|
||
This works by tracing various kernel blk_*() functions using dynamic tracing, | ||
and will need updating to match any changes to these functions. | ||
|
||
Since this uses BPF, only the root user can use this tool. | ||
.SH REQUIREMENTS | ||
CONFIG_BPF and bcc. | ||
.SH EXAMPLES | ||
.TP | ||
Summarize block device I/O by process, 1 second screen refresh: | ||
# | ||
.B biotop | ||
.TP | ||
Don't clear the screen: | ||
# | ||
.B biotop -C | ||
.TP | ||
5 second summaries, 10 times only: | ||
# | ||
.B biotop 5 10 | ||
.SH FIELDS | ||
.TP | ||
loadavg: | ||
The contents of /proc/loadavg | ||
.TP | ||
PID | ||
Cached process ID, if present. This usually (but isn't guaranteed) to identify | ||
the responsible process for the I/O. | ||
.TP | ||
COMM | ||
Cached process name, if present. This usually (but isn't guaranteed) to identify | ||
the responsible process for the I/O. | ||
.TP | ||
D | ||
Direction: R == read, W == write. | ||
.TP | ||
MAJ | ||
Major device number. | ||
.TP | ||
MIN | ||
Minor device number. | ||
.TP | ||
DISK | ||
Disk device name. | ||
.TP | ||
I/O | ||
Number of I/O during the interval. | ||
.TP | ||
Kbytes | ||
Total Kbytes for these I/O, during the interval. | ||
.TP | ||
AVGms | ||
Average time for the I/O (latency) from the issue to the device, to its | ||
completion, in milliseconds. | ||
.SH OVERHEAD | ||
Since block device I/O usually has a relatively low frequency (< 10,000/s), | ||
the overhead for this tool is expected to be low or negligible. For high IOPS | ||
storage systems, test and quantify before use. | ||
.SH SOURCE | ||
This is from bcc. | ||
.IP | ||
https://github.com/iovisor/bcc | ||
.PP | ||
Also look in the bcc distribution for a companion _examples.txt file containing | ||
example usage, output, and commentary for this tool. | ||
.SH OS | ||
Linux | ||
.SH STABILITY | ||
Unstable - in development. | ||
.SH AUTHOR | ||
Brendan Gregg | ||
.SH SEE ALSO | ||
biosnoop(8), biolatency(8), iostat(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
#!/usr/bin/python | ||
# @lint-avoid-python-3-compatibility-imports | ||
# | ||
# biotop block device (disk) I/O by process. | ||
# For Linux, uses BCC, eBPF. | ||
# | ||
# USAGE: biotop.py [-h] [-C] [-r MAXROWS] [interval] [count] | ||
# | ||
# This uses in-kernel eBPF maps to cache process details (PID and comm) by I/O | ||
# request, as well as a starting timestamp for calculating I/O latency. | ||
# | ||
# Copyright 2016 Netflix, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 06-Feb-2016 Brendan Gregg Created this. | ||
|
||
from __future__ import print_function | ||
from bcc import BPF | ||
from time import sleep, strftime | ||
import argparse | ||
import signal | ||
from subprocess import call | ||
|
||
# arguments | ||
examples = """examples: | ||
./biotop # block device I/O top, 1 second refresh | ||
./biotop -C # don't clear the screen | ||
./biotop 5 # 5 second summaries | ||
./biotop 5 10 # 5 second summaries, 10 times only | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Block device (disk) I/O by process", | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
epilog=examples) | ||
parser.add_argument("-C", "--noclear", action="store_true", | ||
help="don't clear the screen") | ||
parser.add_argument("-r", "--maxrows", default=20, | ||
help="maximum rows to print, default 20") | ||
parser.add_argument("interval", nargs="?", default=1, | ||
help="output interval, in seconds") | ||
parser.add_argument("count", nargs="?", default=99999999, | ||
help="number of outputs") | ||
args = parser.parse_args() | ||
interval = int(args.interval) | ||
countdown = int(args.count) | ||
maxrows = int(args.maxrows) | ||
clear = not int(args.noclear) | ||
|
||
# linux stats | ||
loadavg = "/proc/loadavg" | ||
diskstats = "/proc/diskstats" | ||
|
||
# signal handler | ||
def signal_ignore(signal, frame): | ||
print() | ||
|
||
# load BPF program | ||
b = BPF(text=""" | ||
#include <uapi/linux/ptrace.h> | ||
#include <linux/blkdev.h> | ||
// a value of one map, and a key for another: | ||
struct who_t { | ||
u32 pid; | ||
char name[TASK_COMM_LEN]; | ||
}; | ||
struct info_t { | ||
u32 pid; | ||
int type; | ||
int major; | ||
int minor; | ||
char name[TASK_COMM_LEN]; | ||
}; | ||
struct val_t { | ||
u64 bytes; | ||
u64 us; | ||
u32 io; | ||
}; | ||
BPF_HASH(start, struct request *); | ||
BPF_HASH(whobyreq, struct request *, struct who_t); | ||
BPF_HASH(counts, struct info_t, struct val_t); | ||
// cache PID and comm by-req | ||
int trace_pid_start(struct pt_regs *ctx, struct request *req) | ||
{ | ||
struct who_t who = {}; | ||
if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) { | ||
who.pid = bpf_get_current_pid_tgid(); | ||
whobyreq.update(&req, &who); | ||
} | ||
return 0; | ||
} | ||
// time block I/O | ||
int trace_req_start(struct pt_regs *ctx, struct request *req) | ||
{ | ||
u64 ts; | ||
ts = bpf_ktime_get_ns(); | ||
start.update(&req, &ts); | ||
return 0; | ||
} | ||
// output | ||
int trace_req_completion(struct pt_regs *ctx, struct request *req) | ||
{ | ||
u64 *tsp; | ||
// fetch timestamp and calculate delta | ||
tsp = start.lookup(&req); | ||
if (tsp == 0) { | ||
return 0; // missed tracing issue | ||
} | ||
struct who_t *whop; | ||
struct val_t *valp, zero = {}; | ||
u64 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; | ||
// setup info_t key | ||
struct info_t info = {}; | ||
info.major = req->rq_disk->major; | ||
info.minor = req->rq_disk->first_minor; | ||
info.type = req->cmd_flags & REQ_WRITE; | ||
whop = whobyreq.lookup(&req); | ||
if (whop == 0) { | ||
// missed pid who, save stats as pid 0 | ||
valp = counts.lookup_or_init(&info, &zero); | ||
} else { | ||
info.pid = whop->pid; | ||
__builtin_memcpy(&info.name, whop->name, sizeof(info.name)); | ||
valp = counts.lookup_or_init(&info, &zero); | ||
} | ||
// save stats | ||
valp->us += delta_us; | ||
valp->bytes += req->__data_len; | ||
valp->io++; | ||
start.delete(&req); | ||
whobyreq.delete(&req); | ||
return 0; | ||
} | ||
""", debug=0) | ||
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start") | ||
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start") | ||
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start") | ||
b.attach_kprobe(event="blk_account_io_completion", | ||
fn_name="trace_req_completion") | ||
|
||
print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval) | ||
|
||
# cache disk major,minor -> diskname | ||
disklookup = {} | ||
with open(diskstats) as stats: | ||
for line in stats: | ||
a = line.split() | ||
disklookup[a[0] + "," + a[1]] = a[2] | ||
|
||
# output | ||
exiting = 0 | ||
while 1: | ||
try: | ||
sleep(interval) | ||
except KeyboardInterrupt: | ||
exiting = 1 | ||
|
||
# header | ||
if clear: | ||
call("clear") | ||
else: | ||
print() | ||
with open(loadavg) as stats: | ||
print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read())) | ||
print("%-6s %-16s %1s %-3s %-3s %-8s %5s %7s %6s" % ("PID", "COMM", | ||
"D", "MAJ", "MIN", "DISK", "I/O", "Kbytes", "AVGms")) | ||
|
||
# by-PID output | ||
counts = b.get_table("counts") | ||
line = 0 | ||
for k, v in reversed(sorted(counts.items(), | ||
key=lambda counts: counts[1].bytes)): | ||
|
||
# lookup disk | ||
disk = str(k.major) + "," + str(k.minor) | ||
if disk in disklookup: | ||
diskname = disklookup[disk] | ||
else: | ||
diskname = "?" | ||
|
||
# print line | ||
avg_ms = (float(v.us) / 1000) / v.io | ||
print("%-6d %-16s %1s %-3d %-3d %-8s %5s %7s %6.2f" % (k.pid, k.name, | ||
"W" if k.type else "R", k.major, k.minor, diskname, v.io, | ||
v.bytes / 1024, avg_ms)) | ||
|
||
line += 1 | ||
if line >= maxrows: | ||
break | ||
counts.clear() | ||
|
||
countdown -= 1 | ||
if exiting or countdown == 0: | ||
print("Detaching...") | ||
exit() |
Oops, something went wrong.