Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persist query performance stats #8250

Merged
merged 2 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions osquery/config/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,26 +782,27 @@ Status Config::updateSource(const std::string& source,
if (newQueries.find(oldPack.first) == newQueries.end()) {
// This pack was removed. Also remove performance stats.
for (const auto& oldQuery : oldPack.second) {
performance_.erase(getQueryName(oldPack.first, oldQuery.first));
deleteDatabaseValue(kQueryPerformance,
getQueryName(oldPack.first, oldQuery.first));
}
continue;
}
for (const auto& oldQuery : oldPack.second) {
if (newQueries[oldPack.first].find(oldQuery.first) ==
newQueries[oldPack.first].end()) {
// This query was removed. Also remove performance stats.
performance_.erase(getQueryName(oldPack.first, oldQuery.first));
deleteDatabaseValue(kQueryPerformance,
getQueryName(oldPack.first, oldQuery.first));
continue;
}
if (queries[oldPack.first][oldQuery.first] !=
newQueries[oldPack.first][oldQuery.first]) {
// This query was updated. Clear the performance stats.
auto fullName = getQueryName(oldPack.first, oldQuery.first);
RecursiveLock lock(config_performance_mutex_);
if (performance_.count(fullName) != 0) {
LOG(INFO) << "Clearing performance stats for query: " << fullName;
performance_[fullName] = QueryPerformance();
}
LOG(INFO) << "Clearing performance stats for query: " << fullName;
setDatabaseValue(
kQueryPerformance, fullName, QueryPerformance().toCSV());
}
}
}
Expand Down Expand Up @@ -1043,7 +1044,6 @@ void Config::reset() {
setStartTime(getUnixTime());

schedule_ = std::make_unique<Schedule>();
std::map<std::string, QueryPerformance>().swap(performance_);
std::map<std::string, FileCategories>().swap(files_);
std::map<std::string, std::string>().swap(hash_);
valid_ = false;
Expand Down Expand Up @@ -1087,12 +1087,13 @@ void Config::recordQueryPerformance(const std::string& name,
const Row& r0,
const Row& r1) {
RecursiveLock lock(config_performance_mutex_);
if (performance_.count(name) == 0) {
performance_[name] = QueryPerformance();
std::string csv;
QueryPerformance query;
auto status = getDatabaseValue(kQueryPerformance, name, csv);
if (status.ok()) {
query = QueryPerformance(csv);
}

// Grab access to the non-const schedule item.
auto& query = performance_.at(name);
if (!r1.at("user_time").empty() && !r0.at("user_time").empty()) {
auto ut1 = tryTo<long long>(r1.at("user_time"));
auto ut0 = tryTo<long long>(r0.at("user_time"));
Expand Down Expand Up @@ -1132,6 +1133,12 @@ void Config::recordQueryPerformance(const std::string& name,
query.executions += 1;
query.last_executed = getUnixTime();

status = setDatabaseValue(kQueryPerformance, name, query.toCSV());
if (!status.ok()) {
LOG(WARNING) << "Could not write performance stats for query " << name
<< " to the database: " << status.getMessage();
}

/* Clear the executing query only if a resource limit has not been hit.
This is used by the next worker execution to denylist a query
that triggered a watchdog resource limit. */
Expand All @@ -1153,10 +1160,11 @@ void Config::recordQueryStart(const std::string& name) {

void Config::getPerformanceStats(
const std::string& name,
std::function<void(const QueryPerformance& query)> predicate) const {
if (performance_.count(name) > 0) {
RecursiveLock lock(config_performance_mutex_);
predicate(performance_.at(name));
std::function<void(const QueryPerformance& query)> predicate) {
std::string csv;
auto status = getDatabaseValue(kQueryPerformance, name, csv);
if (status.ok()) {
predicate(QueryPerformance(csv));
}
}

Expand Down
19 changes: 8 additions & 11 deletions osquery/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ class Config : private boost::noncopyable {
* @param r0 the process row before the query
* @param r1 the process row after the query
*/
void recordQueryPerformance(const std::string& name,
uint64_t delay_ms,
uint64_t size,
const Row& r0,
const Row& r1);
static void recordQueryPerformance(const std::string& name,
uint64_t delay_ms,
uint64_t size,
const Row& r0,
const Row& r1);

/**
* @brief Record a query 'initialization', meaning the query will run.
Expand Down Expand Up @@ -215,16 +215,16 @@ class Config : private boost::noncopyable {
* QueryPerformance struct, if it exists.
*
* @code{.cpp}
* Config::get().getPerformanceStats(
* Config::getPerformanceStats(
* "my_awesome_query",
* [](const QueryPerformance& query) {
* // use "query" here
* });
* @endcode
*/
void getPerformanceStats(
static void getPerformanceStats(
const std::string& name,
std::function<void(const QueryPerformance& query)> predicate) const;
std::function<void(const QueryPerformance& query)> predicate);

/**
* @brief Helper to access config parsers via the registry
Expand Down Expand Up @@ -331,9 +331,6 @@ class Config : private boost::noncopyable {
/// Schedule of packs and their queries.
std::unique_ptr<Schedule> schedule_;

/// A set of performance stats for each query in the schedule.
std::map<std::string, QueryPerformance> performance_;

/// A set of named categories filled with filesystem globbing paths.
using FileCategories = std::map<std::string, std::vector<std::string>>;
std::map<std::string, FileCategories> files_;
Expand Down
77 changes: 77 additions & 0 deletions osquery/core/sql/query_performance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,80 @@
*/

#include "query_performance.h"
#include "boost/lexical_cast.hpp"
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <string>

namespace osquery {

// Helper function to convert a string to a given type.
template <typename Result>
Result convert(const std::string& source) {
Result result;
if (!boost::conversion::try_lexical_convert<Result>(source, result)) {
return Result();
}
return result;
}

QueryPerformance::QueryPerformance(const std::string& csv) {
std::vector<std::string> parts;
boost::split(parts, csv, boost::is_any_of(","));
// future proofing the size, in case additional fields are added
if (parts.size() < 12) {
return;
}

executions = convert<std::size_t>(parts[0]);
last_executed = convert<std::uint64_t>(parts[1]);
wall_time = convert<std::uint64_t>(parts[2]);
wall_time_ms = convert<std::uint64_t>(parts[3]);
last_wall_time_ms = convert<std::uint64_t>(parts[4]);
user_time = convert<std::uint64_t>(parts[5]);
last_user_time = convert<std::uint64_t>(parts[6]);
system_time = convert<std::uint64_t>(parts[7]);
last_system_time = convert<std::uint64_t>(parts[8]);
average_memory = convert<std::uint64_t>(parts[9]);
last_memory = convert<std::uint64_t>(parts[10]);
output_size = convert<std::uint64_t>(parts[11]);
}

std::string QueryPerformance::toCSV() const {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My first thought, is that CSV feels like a brittle format. But looking at the data, it's a dozen numbers. And CSV doesn't feel wrong for that case. JSON feels like a lot of overhead, and ASN.1 is, well, ASN.1.

Though I'm a little leery of the parsing, but since I think rocksdb is all string values under the hood, I'm not sure there's better

return std::to_string(executions) + "," + std::to_string(last_executed) +
"," + std::to_string(wall_time) + "," + std::to_string(wall_time_ms) +
"," + std::to_string(last_wall_time_ms) + "," +
std::to_string(user_time) + "," + std::to_string(last_user_time) +
"," + std::to_string(system_time) + "," +
std::to_string(last_system_time) + "," +
std::to_string(average_memory) + "," + std::to_string(last_memory) +
"," + std::to_string(output_size);
}
Comment on lines +28 to +59
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC we are storing the performance data in the RocksDB store serialized to a CSV. I see the comment on line 31 about future proofing but I don't quite understand. Should we add more about what a future engineer would need to do to add/remove "columns" from the performance stats?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment refers to the validation check: parts.size() < 12

If the check was: parts.size() != 12, then it would be future proof, because we could not parse data with 13 elements, which might happen in the future.


bool operator==(const QueryPerformance& l, const QueryPerformance& r) {
return std::tie(l.executions,
l.last_executed,
l.wall_time,
l.wall_time_ms,
l.last_wall_time_ms,
l.user_time,
l.last_user_time,
l.system_time,
l.last_system_time,
l.average_memory,
l.last_memory,
l.output_size) == std::tie(r.executions,
r.last_executed,
r.wall_time,
r.wall_time_ms,
r.last_wall_time_ms,
r.user_time,
r.last_user_time,
r.system_time,
r.last_system_time,
r.average_memory,
r.last_memory,
r.output_size);
}

} // namespace osquery
12 changes: 12 additions & 0 deletions osquery/core/sql/query_performance.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <cstddef>
#include <cstdint>
#include <string>

namespace osquery {

Expand Down Expand Up @@ -55,6 +56,17 @@ struct QueryPerformance {

/// Total bytes for the query
std::uint64_t output_size{0};

// Default constructor
QueryPerformance() = default;

// Constructor from a CSV string
explicit QueryPerformance(const std::string& csv);

// Convert this struct to a CSV string
[[nodiscard]] std::string toCSV() const;

friend bool operator==(const QueryPerformance& l, const QueryPerformance& r);
};

} // namespace osquery
1 change: 1 addition & 0 deletions osquery/core/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ endfunction()
function(generateOsqueryCoreTestsMergedtestsTest)
set(source_files
flags_tests.cpp
query_performance_tests.cpp
system_test.cpp
tables_tests.cpp
watcher_tests.cpp
Expand Down
56 changes: 56 additions & 0 deletions osquery/core/tests/query_performance_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Copyright (c) 2014-present, The osquery authors
*
* This source code is licensed as defined by the LICENSE file found in the
* root directory of this source tree.
*
* SPDX-License-Identifier: (Apache-2.0 OR GPL-2.0-only)
*/

#include "osquery/core/sql/query_performance.h"
#include <gtest/gtest.h>

namespace osquery {

class QueryPerformanceTests : public testing::Test {};

TEST_F(QueryPerformanceTests, test_query_performance) {
// Default case
QueryPerformance defaultStats;
auto emptyStats = QueryPerformance("");
ASSERT_EQ(defaultStats, emptyStats);
ASSERT_EQ("0,0,0,0,0,0,0,0,0,0,0,0", defaultStats.toCSV());

// Normal case
{
QueryPerformance expected;
expected.executions = 1;
expected.last_executed = 2;
expected.wall_time = 3;
expected.wall_time_ms = 4;
expected.last_wall_time_ms = 5;
expected.user_time = 6;
expected.last_user_time = 7;
expected.system_time = 8;
expected.last_system_time = 9;
expected.average_memory = 10;
expected.last_memory = 11;
expected.output_size = 12;
std::string csv = "1,2,3,4,5,6,7,8,9,10,11,12";
auto filledStats = QueryPerformance(csv);
ASSERT_EQ(expected, filledStats);
ASSERT_EQ(csv, expected.toCSV());
ASSERT_EQ(csv, filledStats.toCSV());
}

// Invalid case
{
std::string csv = "1,,bozo,4,5,6,7,8,9,10,11,12";
auto filledStats = QueryPerformance(csv);
ASSERT_EQ(0, filledStats.last_executed);
ASSERT_EQ(0, filledStats.wall_time);
ASSERT_EQ("1,0,0,4,5,6,7,8,9,10,11,12", filledStats.toCSV());
}
}

} // namespace osquery
4 changes: 3 additions & 1 deletion osquery/database/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const std::string kCarves = "carves";
const std::string kLogs = "logs";
const std::string kDistributedQueries = "distributed";
const std::string kDistributedRunningQueries = "distributed_running";
const std::string kQueryPerformance = "query_performance";

const std::string kDbEpochSuffix = "epoch";
const std::string kDbCounterSuffix = "counter";
Expand All @@ -59,7 +60,8 @@ const std::vector<std::string> kDomains = {kPersistentSettings,
kLogs,
kCarves,
kDistributedQueries,
kDistributedRunningQueries};
kDistributedRunningQueries,
kQueryPerformance};

std::atomic<bool> kDBAllowOpen(false);
std::atomic<bool> kDBInitialized(false);
Expand Down
3 changes: 3 additions & 0 deletions osquery/database/database.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ extern const std::string kDistributedQueries;
/// The "domain" where currently running distributed queries are stored.
extern const std::string kDistributedRunningQueries;

/// The "domain" where query performance stats are stored.
extern const std::string kQueryPerformance;

/// The running version of our database schema
const int kDbCurrentVersion = 2;

Expand Down
Loading