-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[core] Setup cgroup v2 in C++ #49416
base: master
Are you sure you want to change the base?
Changes from 1 commit
a9e8dc6
feb0282
ce77002
995dc0f
f54f2eb
f3a4a9a
341317f
50874cd
3027ed8
12f0cfe
c5e3fd7
b679ba7
7689889
792fa54
128bfdf
d3fdd2a
2dcd1e7
56aeea9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Signed-off-by: dentiny <dentinyhao@gmail.com>
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// Copyright 2024 The Ray Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include <unistd.h> | ||
|
||
#include <cstdint> | ||
#include <string> | ||
#include <string_view> | ||
|
||
namespace ray { | ||
|
||
// Context used to setup cgroupv2 for a task / actor. | ||
struct PhysicalModeExecutionContext { | ||
// Directory for cgroup, which is appled to application process. | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
std::string_view cgroup_directory; | ||
// UUID to indicate the current task / actor. | ||
std::string uuid; | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// PID for the process. | ||
pid_t pid; | ||
|
||
// Memory-related spec. | ||
// | ||
// Unit: bytes. Corresponds to cgroup V2 `memory.max`, which enforces hard cap on max | ||
// memory consumption. 0 means no limit. | ||
uint64_t max_memory = 0; | ||
}; | ||
|
||
} // namespace ray |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
// Copyright 2024 The Ray Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "ray/common/cgroup/cgroup_utils.h" | ||
|
||
#include <sys/stat.h> | ||
|
||
#include <fstream> | ||
|
||
#include "absl/strings/str_format.h" | ||
#include "absl/strings/str_join.h" | ||
#include "absl/strings/str_split.h" | ||
#include "ray/util/logging.h" | ||
|
||
namespace ray { | ||
|
||
namespace { | ||
|
||
// Owner can read and write. | ||
constexpr int kCgroupFilePerm = 0600; | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not a uuid, and we should not use a name like this visible in linux. We had the idea of getting a default name from cluster ID right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Renamed as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm not sure how cluster id is related here? |
||
// Open a cgroup path and append write [content] into the file. | ||
void OpenCgroupFileAndAppend(std::string_view path, std::string content) { | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
std::ofstream out_file{path.data(), std::ios::out | std::ios::app}; | ||
out_file << content; | ||
} | ||
|
||
bool CreateNewCgroup(const PhysicalModeExecutionContext &ctx) { | ||
// Sanity check. | ||
RAY_CHECK(!ctx.uuid.empty()); | ||
RAY_CHECK_NE(ctx.uuid, kDefaultCgroupUuid); | ||
RAY_CHECK_GT(ctx.max_memory, 0); | ||
|
||
const std::string cgroup_folder = | ||
absl::StrFormat("%s/%s", ctx.cgroup_directory, ctx.uuid); | ||
int ret_code = mkdir(cgroup_folder.data(), kCgroupFilePerm); | ||
if (ret_code != 0) { | ||
return false; | ||
} | ||
|
||
if (ctx.max_memory > 0) { | ||
const std::string procs_path = absl::StrFormat("%s/cgroup.procs", cgroup_folder); | ||
OpenCgroupFileAndAppend(procs_path, absl::StrFormat("%d", ctx.pid)); | ||
|
||
const std::string max_memory_path = absl::StrFormat("%s/memory.max", cgroup_folder); | ||
OpenCgroupFileAndAppend(max_memory_path, absl::StrFormat("%d", ctx.max_memory)); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
bool UpdateDefaultCgroup(const PhysicalModeExecutionContext &ctx) { | ||
// Sanity check. | ||
RAY_CHECK(!ctx.uuid.empty()); | ||
RAY_CHECK_EQ(ctx.uuid, kDefaultCgroupUuid); | ||
RAY_CHECK_EQ(ctx.max_memory, 0); | ||
|
||
const std::string cgroup_folder = | ||
absl::StrFormat("%s/%s", ctx.cgroup_directory, ctx.uuid); | ||
int ret_code = mkdir(cgroup_folder.data(), kCgroupFilePerm); | ||
if (ret_code != 0) { | ||
return false; | ||
} | ||
|
||
const std::string procs_path = absl::StrFormat("%s/cgroup.procs", cgroup_folder); | ||
OpenCgroupFileAndAppend(procs_path, absl::StrFormat("%d", ctx.pid)); | ||
|
||
return true; | ||
} | ||
|
||
bool DeleteCgroup(const PhysicalModeExecutionContext &ctx) { | ||
// Sanity check. | ||
RAY_CHECK(!ctx.uuid.empty()); | ||
RAY_CHECK_NE(ctx.uuid, kDefaultCgroupUuid); | ||
RAY_CHECK_GT(ctx.max_memory, 0); | ||
|
||
const std::string cgroup_folder = | ||
absl::StrFormat("%s/%s", ctx.cgroup_directory, ctx.uuid); | ||
return rmdir(cgroup_folder.data()) == 0; | ||
} | ||
|
||
bool RemoveCtxFromDefaultCgroup(const PhysicalModeExecutionContext &ctx) { | ||
// Sanity check. | ||
RAY_CHECK(!ctx.uuid.empty()); | ||
RAY_CHECK_EQ(ctx.uuid, kDefaultCgroupUuid); | ||
RAY_CHECK_EQ(ctx.max_memory, 0); | ||
|
||
const std::string cgroup_folder = | ||
absl::StrFormat("%s/%s", ctx.cgroup_directory, ctx.uuid); | ||
int ret_code = mkdir(cgroup_folder.data(), kCgroupFilePerm); | ||
if (ret_code != 0) { | ||
return false; | ||
} | ||
|
||
const std::string procs_path = absl::StrFormat("%s/cgroup.procs", cgroup_folder); | ||
std::ostringstream buffer; | ||
{ | ||
std::ifstream file(procs_path.data(), std::ios::in); | ||
buffer << file.rdbuf(); | ||
} | ||
std::string content = buffer.str(); // contains all PIDs, separated by space | ||
|
||
std::vector<std::string_view> old_pid_strings = absl::StrSplit(content, ' '); | ||
rynewang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
std::vector<std::string_view> new_pid_strings; | ||
new_pid_strings.reserve(old_pid_strings.size() - 1); | ||
for (const auto &cur_pid : old_pid_strings) { | ||
if (cur_pid == absl::StrFormat("%d", ctx.pid)) { | ||
continue; | ||
} | ||
new_pid_strings.emplace_back(cur_pid); | ||
} | ||
|
||
const std::string new_pids = absl::StrJoin(new_pid_strings, " "); | ||
{ | ||
std::ofstream out_file{procs_path.data(), std::ios::out}; | ||
out_file << new_pids; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
} // namespace | ||
|
||
bool SetupCgroupForContext(const PhysicalModeExecutionContext &ctx) { | ||
#ifndef __linux__ | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return false; | ||
#endif | ||
|
||
// Create a new cgroup if max memory specified. | ||
if (ctx.max_memory > 0) { | ||
return CreateNewCgroup(ctx); | ||
} | ||
|
||
// Update default cgroup if no max resource specified. | ||
return UpdateDefaultCgroup(ctx); | ||
} | ||
|
||
bool CleanupCgroupForContext(const PhysicalModeExecutionContext &ctx) { | ||
#ifndef __linux__ | ||
return false; | ||
#endif | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Delete the dedicated cgroup if max memory specified. | ||
if (ctx.max_memory > 0) { | ||
return DeleteCgroup(ctx); | ||
} | ||
|
||
// Update default cgroup if no max resource specified. | ||
return RemoveCtxFromDefaultCgroup(ctx); | ||
} | ||
|
||
} // namespace ray |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
// Copyright 2024 The Ray Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// Util functions to setup cgroup. | ||
|
||
#pragma once | ||
|
||
#include <string_view> | ||
|
||
#include "ray/common/cgroup/cgroup_context.h" | ||
|
||
namespace ray { | ||
|
||
// There're two types of memory cgroup constraints: | ||
// 1. For those with limit capped, they will be created a dedicated cgroup; | ||
// 2. For those without limit specified, they will be added to the default cgroup. | ||
inline constexpr std::string_view kDefaultCgroupUuid = "default_cgroup_uuid"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I choose the word |
||
|
||
// Setup cgroup based on the given [ctx]. Return whether the setup succeeds or not. | ||
bool SetupCgroupForContext(const PhysicalModeExecutionContext &ctx); | ||
dentiny marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Cleanup cgroup based on the given [ctx]. Return whether the cleanup succeds or not. | ||
bool CleanupCgroupForContext(const PhysicalModeExecutionContext &ctx); | ||
|
||
} // namespace ray |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Q: do we need this separate config class from CgroupV2Setup?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These data fields are necessary to construct and destruct cgroup;
As of now the struct doesn't seem that necessary since it only contains 4 fields and we could directly pass them into the factory function, but we could have much more fields (i.e. cpu-related, resource min / high), better to have a struct.