From 7e0784d37aa66fff936aeebcc042ff5602cae890 Mon Sep 17 00:00:00 2001 From: Andreas Gerstmayr Date: Mon, 16 Jan 2017 16:35:58 +0100 Subject: [PATCH] fix iteration over CPUs Since kernel version 4.9.0 BPF stopped working in a KVM guest. The problem are calls to perf_event_open with CPU identifiers which do not exist (ENODEV). The root cause for this is that the current code assumes ascending numbered CPUs. However, this is not always the case (e.g. CPU hotplugging). This patch introduces the get_online_cpus() and get_possible_cpus() helper functions and uses the appropriate function for iterations over CPUs. The BPF_MAP_TYPE_PERF_EVENT_ARRAY map contains now an entry for each possible CPU instead of for each online CPU. Fixes: #893 Signed-off-by: Andreas Gerstmayr --- src/cc/BPF.cc | 10 ++-- src/cc/BPFTable.cc | 5 +- src/cc/CMakeLists.txt | 4 +- src/cc/common.cc | 51 +++++++++++++++++++++ src/cc/common.h | 5 ++ src/cc/frontends/clang/CMakeLists.txt | 2 +- src/cc/frontends/clang/b_frontend_action.cc | 3 +- src/python/bcc/__init__.py | 4 +- src/python/bcc/perf.py | 4 +- src/python/bcc/table.py | 5 +- src/python/bcc/utils.py | 33 +++++++++++++ tests/cc/test_c_api.cc | 8 ++++ tests/python/CMakeLists.txt | 2 + tests/python/test_array.py | 33 +++++++++++++ tests/python/test_utils.py | 18 ++++++++ 15 files changed, 171 insertions(+), 16 deletions(-) create mode 100644 src/cc/common.cc create mode 100644 src/python/bcc/utils.py create mode 100755 tests/python/test_utils.py diff --git a/src/cc/BPF.cc b/src/cc/BPF.cc index 4a7ca2ccea11..292e35177237 100644 --- a/src/cc/BPF.cc +++ b/src/cc/BPF.cc @@ -30,6 +30,7 @@ #include "bpf_module.h" #include "libbpf.h" #include "perf_reader.h" +#include "common.h" #include "usdt.h" #include "BPF.h" @@ -297,11 +298,12 @@ StatusTuple BPF::attach_perf_event(uint32_t ev_type, uint32_t ev_config, TRY2(load_func(probe_func, BPF_PROG_TYPE_PERF_EVENT, probe_fd)); auto fds = new std::map(); - int cpu_st = 0; - int cpu_en = sysconf(_SC_NPROCESSORS_ONLN) - 1; + std::vector cpus; if (cpu >= 0) - cpu_st = cpu_en = cpu; - for (int i = cpu_st; i <= cpu_en; i++) { + cpus.push_back(cpu); + else + cpus = get_online_cpus(); + for (int i: cpus) { int fd = bpf_attach_perf_event(probe_fd, ev_type, ev_config, sample_period, sample_freq, pid, i, group_fd); if (fd < 0) { diff --git a/src/cc/BPFTable.cc b/src/cc/BPFTable.cc index 994e51fb6856..837d5bd0b649 100644 --- a/src/cc/BPFTable.cc +++ b/src/cc/BPFTable.cc @@ -25,6 +25,7 @@ #include "bcc_syms.h" #include "libbpf.h" #include "perf_reader.h" +#include "common.h" namespace ebpf { @@ -89,7 +90,7 @@ StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb, if (cpu_readers_.size() != 0 || readers_.size() != 0) return StatusTuple(-1, "Previously opened perf buffer not cleaned"); - for (int i = 0; i < sysconf(_SC_NPROCESSORS_ONLN); i++) { + for (int i: get_online_cpus()) { auto res = open_on_cpu(cb, i, cb_cookie); if (res.code() != 0) { TRY2(close_all_cpu()); @@ -113,7 +114,7 @@ StatusTuple BPFPerfBuffer::close_on_cpu(int cpu) { StatusTuple BPFPerfBuffer::close_all_cpu() { std::string errors; bool has_error = false; - for (int i = 0; i < sysconf(_SC_NPROCESSORS_ONLN); i++) { + for (int i: get_online_cpus()) { auto res = close_on_cpu(i); if (res.code() != 0) { errors += "Failed to close CPU" + std::to_string(i) + " perf buffer: "; diff --git a/src/cc/CMakeLists.txt b/src/cc/CMakeLists.txt index fed6d3a89b27..2d1fdca54c7c 100644 --- a/src/cc/CMakeLists.txt +++ b/src/cc/CMakeLists.txt @@ -35,12 +35,12 @@ if (CMAKE_COMPILER_IS_GNUCC AND LIBCLANG_ISSTATIC) endif() endif() -add_library(bcc-shared SHARED bpf_common.cc bpf_module.cc libbpf.c perf_reader.c shared_table.cc exported_files.cc bcc_elf.c bcc_perf_map.c bcc_proc.c bcc_syms.cc usdt_args.cc usdt.cc BPF.cc BPFTable.cc) +add_library(bcc-shared SHARED bpf_common.cc bpf_module.cc libbpf.c perf_reader.c shared_table.cc exported_files.cc bcc_elf.c bcc_perf_map.c bcc_proc.c bcc_syms.cc usdt_args.cc usdt.cc common.cc BPF.cc BPFTable.cc) set_target_properties(bcc-shared PROPERTIES VERSION ${REVISION_LAST} SOVERSION 0) set_target_properties(bcc-shared PROPERTIES OUTPUT_NAME bcc) add_library(bcc-loader-static libbpf.c perf_reader.c bcc_elf.c bcc_perf_map.c bcc_proc.c) -add_library(bcc-static STATIC bpf_common.cc bpf_module.cc shared_table.cc exported_files.cc bcc_syms.cc usdt_args.cc usdt.cc BPF.cc BPFTable.cc) +add_library(bcc-static STATIC bpf_common.cc bpf_module.cc shared_table.cc exported_files.cc bcc_syms.cc usdt_args.cc usdt.cc common.cc BPF.cc BPFTable.cc) set_target_properties(bcc-static PROPERTIES OUTPUT_NAME bcc) set(llvm_raw_libs bitwriter bpfcodegen irreader linker diff --git a/src/cc/common.cc b/src/cc/common.cc new file mode 100644 index 000000000000..78bdb862677c --- /dev/null +++ b/src/cc/common.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 Catalysts GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "common.h" + +namespace ebpf { + +std::vector read_cpu_range(std::string path) { + std::ifstream cpus_range_stream { path }; + std::vector cpus; + std::string cpu_range; + + while (std::getline(cpus_range_stream, cpu_range, ',')) { + std::size_t rangeop = cpu_range.find('-'); + if (rangeop == std::string::npos) { + cpus.push_back(std::stoi(cpu_range)); + } + else { + int start = std::stoi(cpu_range.substr(0, rangeop)); + int end = std::stoi(cpu_range.substr(rangeop + 1)); + for (int i = start; i <= end; i++) + cpus.push_back(i); + } + } + return cpus; +} + +std::vector get_online_cpus() { + return read_cpu_range("/sys/devices/system/cpu/online"); +} + +std::vector get_possible_cpus() { + return read_cpu_range("/sys/devices/system/cpu/possible"); +} + + +} // namespace ebpf diff --git a/src/cc/common.h b/src/cc/common.h index b908f25495f9..377ddfdcb298 100644 --- a/src/cc/common.h +++ b/src/cc/common.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace ebpf { @@ -28,4 +29,8 @@ make_unique(Args &&... args) { return std::unique_ptr(new T(std::forward(args)...)); } +std::vector get_online_cpus(); + +std::vector get_possible_cpus(); + } // namespace ebpf diff --git a/src/cc/frontends/clang/CMakeLists.txt b/src/cc/frontends/clang/CMakeLists.txt index f1ad50f2fa73..5f6f6391e942 100644 --- a/src/cc/frontends/clang/CMakeLists.txt +++ b/src/cc/frontends/clang/CMakeLists.txt @@ -2,4 +2,4 @@ # Licensed under the Apache License, Version 2.0 (the "License") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKERNEL_MODULES_DIR='\"${BCC_KERNEL_MODULES_DIR}\"'") -add_library(clang_frontend loader.cc b_frontend_action.cc tp_frontend_action.cc kbuild_helper.cc) +add_library(clang_frontend loader.cc b_frontend_action.cc tp_frontend_action.cc kbuild_helper.cc ../../common.cc) diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc index 397ecc682211..3b633670bf2d 100644 --- a/src/cc/frontends/clang/b_frontend_action.cc +++ b/src/cc/frontends/clang/b_frontend_action.cc @@ -27,6 +27,7 @@ #include "b_frontend_action.h" #include "shared_table.h" +#include "common.h" #include "libbpf.h" @@ -656,7 +657,7 @@ bool BTypeVisitor::VisitVarDecl(VarDecl *Decl) { map_type = BPF_MAP_TYPE_PROG_ARRAY; } else if (A->getName() == "maps/perf_output") { map_type = BPF_MAP_TYPE_PERF_EVENT_ARRAY; - int numcpu = sysconf(_SC_NPROCESSORS_ONLN); + int numcpu = get_possible_cpus().size(); if (numcpu <= 0) numcpu = 1; table.max_entries = numcpu; diff --git a/src/python/bcc/__init__.py b/src/python/bcc/__init__.py index 0304da180b2e..454fe20d70cf 100644 --- a/src/python/bcc/__init__.py +++ b/src/python/bcc/__init__.py @@ -17,7 +17,6 @@ import ctypes as ct import fcntl import json -import multiprocessing import os import re import struct @@ -29,6 +28,7 @@ from .table import Table from .perf import Perf from .usyms import ProcessSymbols +from .utils import get_online_cpus _kprobe_limit = 1000 _num_open_probes = 0 @@ -660,7 +660,7 @@ def attach_perf_event(self, ev_type=-1, ev_config=-1, fn_name="", res[cpu] = self._attach_perf_event(fn.fd, ev_type, ev_config, sample_period, sample_freq, pid, cpu, group_fd) else: - for i in range(0, multiprocessing.cpu_count()): + for i in get_online_cpus(): res[i] = self._attach_perf_event(fn.fd, ev_type, ev_config, sample_period, sample_freq, pid, i, group_fd) self.open_perf_events[(ev_type, ev_config)] = res diff --git a/src/python/bcc/perf.py b/src/python/bcc/perf.py index ea155915ca4c..44b0128df808 100644 --- a/src/python/bcc/perf.py +++ b/src/python/bcc/perf.py @@ -13,8 +13,8 @@ # limitations under the License. import ctypes as ct -import multiprocessing import os +from .utils import get_online_cpus class Perf(object): class perf_event_attr(ct.Structure): @@ -105,5 +105,5 @@ def perf_event_open(tpoint_id, pid=-1, ptype=PERF_TYPE_TRACEPOINT, attr.sample_period = 1 attr.wakeup_events = 9999999 # don't wake up - for cpu in range(0, multiprocessing.cpu_count()): + for cpu in get_online_cpus(): Perf._open_for_cpu(cpu, attr) diff --git a/src/python/bcc/table.py b/src/python/bcc/table.py index 1a4a32467d49..74358e60ab96 100644 --- a/src/python/bcc/table.py +++ b/src/python/bcc/table.py @@ -19,6 +19,7 @@ from .libbcc import lib, _RAW_CB_TYPE from .perf import Perf +from .utils import get_online_cpus from subprocess import check_output BPF_MAP_TYPE_HASH = 1 @@ -509,7 +510,7 @@ def open_perf_buffer(self, callback): event submitted from the kernel, up to millions per second. """ - for i in range(0, multiprocessing.cpu_count()): + for i in get_online_cpus(): self._open_perf_buffer(i, callback) def _open_perf_buffer(self, cpu, callback): @@ -550,7 +551,7 @@ def open_perf_event(self, ev): if not isinstance(ev, self.Event): raise Exception("argument must be an Event, got %s", type(ev)) - for i in range(0, multiprocessing.cpu_count()): + for i in get_online_cpus(): self._open_perf_event(i, ev.typ, ev.config) diff --git a/src/python/bcc/utils.py b/src/python/bcc/utils.py new file mode 100644 index 000000000000..348ab9d5257a --- /dev/null +++ b/src/python/bcc/utils.py @@ -0,0 +1,33 @@ +# Copyright 2016 Catalysts GmbH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def _read_cpu_range(path): + cpus = [] + with open(path, 'r') as f: + cpus_range_str = f.read() + for cpu_range in cpus_range_str.split(','): + rangeop = cpu_range.find('-') + if rangeop == -1: + cpus.append(int(cpu_range)) + else: + start = int(cpu_range[:rangeop]) + end = int(cpu_range[rangeop+1:]) + cpus.extend(range(start, end+1)) + return cpus + +def get_online_cpus(): + return _read_cpu_range('/sys/devices/system/cpu/online') + +def get_possible_cpus(): + return _read_cpu_range('/sys/devices/system/cpu/possible') diff --git a/tests/cc/test_c_api.cc b/tests/cc/test_c_api.cc index af0bd3acf31a..7a5e8dcffaa0 100644 --- a/tests/cc/test_c_api.cc +++ b/tests/cc/test_c_api.cc @@ -23,6 +23,7 @@ #include "bcc_perf_map.h" #include "bcc_proc.h" #include "bcc_syms.h" +#include "common.h" #include "vendor/tinyformat.hpp" #include "catch.hpp" @@ -196,3 +197,10 @@ TEST_CASE("resolve symbols using /tmp/perf-pid.map", "[c_api]") { munmap(map_addr, map_sz); } + + +TEST_CASE("get online CPUs", "[c_api]") { + std::vector cpus = ebpf::get_online_cpus(); + int num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + REQUIRE(cpus.size() == num_cpus); +} diff --git a/tests/python/CMakeLists.txt b/tests/python/CMakeLists.txt index fb0eedbda343..1da2a673df49 100644 --- a/tests/python/CMakeLists.txt +++ b/tests/python/CMakeLists.txt @@ -56,6 +56,8 @@ add_test(NAME py_test_tracepoint WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${TEST_WRAPPER} py_test_tracepoint sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tracepoint.py) add_test(NAME py_test_perf_event WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${TEST_WRAPPER} py_test_perf_event sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_perf_event.py) +add_test(NAME py_test_utils WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${TEST_WRAPPER} py_test_utils sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_utils.py) add_test(NAME py_test_dump_func WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${TEST_WRAPPER} py_dump_func simple ${CMAKE_CURRENT_SOURCE_DIR}/test_dump_func.py) diff --git a/tests/python/test_array.py b/tests/python/test_array.py index 3fe1dceaf3b7..a7c82a4f6d70 100755 --- a/tests/python/test_array.py +++ b/tests/python/test_array.py @@ -6,6 +6,8 @@ import ctypes as ct import random import time +import subprocess +from bcc.utils import get_online_cpus from unittest import main, TestCase class TestArray(TestCase): @@ -62,6 +64,37 @@ def cb(cpu, data, size): time.sleep(0.1) b.kprobe_poll() self.assertGreater(self.counter, 0) + b.cleanup() + + def test_perf_buffer_for_each_cpu(self): + self.events = [] + + class Data(ct.Structure): + _fields_ = [("cpu", ct.c_ulonglong)] + + def cb(cpu, data, size): + self.assertGreater(size, ct.sizeof(Data)) + event = ct.cast(data, ct.POINTER(Data)).contents + self.events.append(event) + + text = """ +BPF_PERF_OUTPUT(events); +int kprobe__sys_nanosleep(void *ctx) { + struct { + u64 cpu; + } data = {bpf_get_smp_processor_id()}; + events.perf_submit(ctx, &data, sizeof(data)); + return 0; +} +""" + b = BPF(text=text) + b["events"].open_perf_buffer(cb) + online_cpus = get_online_cpus() + for cpu in online_cpus: + subprocess.call(['taskset', '-c', str(cpu), 'sleep', '0.1']) + b.kprobe_poll() + b.cleanup() + self.assertGreaterEqual(len(self.events), len(online_cpus), 'Received only {}/{} events'.format(len(self.events), len(online_cpus))) if __name__ == "__main__": main() diff --git a/tests/python/test_utils.py b/tests/python/test_utils.py new file mode 100755 index 000000000000..08862e74074d --- /dev/null +++ b/tests/python/test_utils.py @@ -0,0 +1,18 @@ +#!/usr/bin/python +# Copyright (c) Catalysts GmbH +# Licensed under the Apache License, Version 2.0 (the "License") + +from bcc.utils import get_online_cpus +import multiprocessing +import unittest + +class TestUtils(unittest.TestCase): + def test_get_online_cpus(self): + online_cpus = get_online_cpus() + num_cores = multiprocessing.cpu_count() + + self.assertEqual(len(online_cpus), num_cores) + + +if __name__ == "__main__": + unittest.main()