Skip to content

Commit

Permalink
Merge pull request #10978 from Tishj/maximum_swap_space
Browse files Browse the repository at this point in the history
Add setting to control the maximum swap space
  • Loading branch information
Mytherin authored Apr 17, 2024
2 parents 1601d94 + c93ab90 commit c07b645
Show file tree
Hide file tree
Showing 22 changed files with 765 additions and 90 deletions.
32 changes: 32 additions & 0 deletions src/common/file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "duckdb/main/database.hpp"
#include "duckdb/main/extension_helper.hpp"
#include "duckdb/common/windows_util.hpp"
#include "duckdb/common/operator/multiply.hpp"

#include <cstdint>
#include <cstdio>
Expand All @@ -21,6 +22,7 @@
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/types.h>
#include <unistd.h>

Expand Down Expand Up @@ -133,6 +135,24 @@ optional_idx FileSystem::GetAvailableMemory() {
return max_memory;
}

optional_idx FileSystem::GetAvailableDiskSpace(const string &path) {
struct statvfs vfs;

auto ret = statvfs(path.c_str(), &vfs);
if (ret == -1) {
return optional_idx();
}
auto block_size = vfs.f_frsize;
// These are the blocks available for creating new files or extending existing ones
auto available_blocks = vfs.f_bfree;
idx_t available_disk_space = DConstants::INVALID_INDEX;
if (!TryMultiplyOperator::Operation(static_cast<idx_t>(block_size), static_cast<idx_t>(available_blocks),
available_disk_space)) {
return optional_idx();
}
return available_disk_space;
}

string FileSystem::GetWorkingDirectory() {
auto buffer = make_unsafe_uniq_array<char>(PATH_MAX);
char *ret = getcwd(buffer.get(), PATH_MAX);
Expand Down Expand Up @@ -233,6 +253,18 @@ optional_idx FileSystem::GetAvailableMemory() {
return optional_idx();
}

optional_idx FileSystem::GetAvailableDiskSpace(const string &path) {
ULARGE_INTEGER available_bytes, total_bytes, free_bytes;

auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str());
if (!GetDiskFreeSpaceExW(unicode_path.c_str(), &available_bytes, &total_bytes, &free_bytes)) {
return optional_idx();
}
(void)total_bytes;
(void)free_bytes;
return NumericCast<idx_t>(available_bytes.QuadPart);
}

string FileSystem::GetWorkingDirectory() {
idx_t count = GetCurrentDirectoryW(0, nullptr);
if (count == 0) {
Expand Down
3 changes: 3 additions & 0 deletions src/include/duckdb/common/file_system.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "duckdb/common/vector.hpp"
#include "duckdb/common/enums/file_glob_options.hpp"
#include "duckdb/common/optional_ptr.hpp"
#include "duckdb/common/optional_idx.hpp"
#include "duckdb/common/error_data.hpp"
#include "duckdb/common/file_open_flags.hpp"
#include <functional>
Expand Down Expand Up @@ -172,6 +173,8 @@ class FileSystem {
DUCKDB_API virtual string ExpandPath(const string &path);
//! Returns the system-available memory in bytes. Returns DConstants::INVALID_INDEX if the system function fails.
DUCKDB_API static optional_idx GetAvailableMemory();
//! Returns the space available on the disk. Returns DConstants::INVALID_INDEX if the information was not available.
DUCKDB_API static optional_idx GetAvailableDiskSpace(const string &path);
//! Path separator for path
DUCKDB_API virtual string PathSeparator(const string &path);
//! Checks if path is starts with separator (i.e., '/' on UNIX '\\' on Windows)
Expand Down
10 changes: 7 additions & 3 deletions src/include/duckdb/main/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,14 @@ struct DBConfigOptions {
#endif
//! Override for the default extension repository
string custom_extension_repo = "";
//! Override for the default autoload extensoin repository
//! Override for the default autoload extension repository
string autoinstall_extension_repo = "";
//! The maximum memory used by the database system (in bytes). Default: 80% of System available memory
idx_t maximum_memory = (idx_t)-1;
idx_t maximum_memory = DConstants::INVALID_INDEX;
//! The maximum size of the 'temp_directory' folder when set (in bytes). Default: 90% of available disk space.
idx_t maximum_swap_space = DConstants::INVALID_INDEX;
//! The maximum amount of CPU threads used by the database system. Default: all available.
idx_t maximum_threads = (idx_t)-1;
idx_t maximum_threads = DConstants::INVALID_INDEX;
//! The number of external threads that work on DuckDB tasks. Default: 1.
//! Must be smaller or equal to maximum_threads.
idx_t external_threads = 1;
Expand Down Expand Up @@ -254,6 +256,7 @@ struct DBConfig {
DUCKDB_API static vector<ConfigurationOption> GetOptions();
DUCKDB_API static idx_t GetOptionCount();
DUCKDB_API static vector<string> GetOptionNames();
DUCKDB_API static bool IsInMemoryDatabase(const char *database_path);

DUCKDB_API void AddExtensionOption(const string &name, string description, LogicalType parameter,
const Value &default_value = Value(), set_option_callback_t function = nullptr);
Expand Down Expand Up @@ -285,6 +288,7 @@ struct DBConfig {
DUCKDB_API IndexTypeSet &GetIndexTypes();
static idx_t GetSystemMaxThreads(FileSystem &fs);
void SetDefaultMaxMemory();
void SetDefaultTempDirectory();

OrderType ResolveOrder(OrderType order_type) const;
OrderByNullType ResolveNullOrder(OrderType order_type, OrderByNullType null_type) const;
Expand Down
2 changes: 1 addition & 1 deletion src/include/duckdb/main/database.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class DatabaseInstance : public std::enable_shared_from_this<DatabaseInstance> {
void Initialize(const char *path, DBConfig *config);
void CreateMainDatabase();

void Configure(DBConfig &config);
void Configure(DBConfig &config, const char *path);

private:
shared_ptr<BufferManager> buffer_manager;
Expand Down
10 changes: 10 additions & 0 deletions src/include/duckdb/main/settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,16 @@ struct MaximumMemorySetting {
static Value GetSetting(const ClientContext &context);
};

struct MaximumTempDirectorySize {
static constexpr const char *Name = "max_temp_directory_size";
static constexpr const char *Description =
"The maximum amount of data stored inside the 'temp_directory' (when set) (e.g. 1GB)";
static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
static void ResetGlobal(DatabaseInstance *db, DBConfig &config);
static Value GetSetting(const ClientContext &context);
};

struct OldImplicitCasting {
static constexpr const char *Name = "old_implicit_casting";
static constexpr const char *Description = "Allow implicit casting to/from VARCHAR";
Expand Down
18 changes: 15 additions & 3 deletions src/include/duckdb/storage/buffer_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,30 +40,39 @@ class BufferManager {
virtual void ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size) = 0;
virtual BufferHandle Pin(shared_ptr<BlockHandle> &handle) = 0;
virtual void Unpin(shared_ptr<BlockHandle> &handle) = 0;

//! Returns the currently allocated memory
virtual idx_t GetUsedMemory() const = 0;
//! Returns the maximum available memory
virtual idx_t GetMaxMemory() const = 0;
//! Returns the currently used swap space
virtual idx_t GetUsedSwap() = 0;
//! Returns the maximum swap space that can be used
virtual optional_idx GetMaxSwap() const = 0;

//! Returns a new block of memory that is smaller than Storage::BLOCK_SIZE
virtual shared_ptr<BlockHandle> RegisterSmallMemory(idx_t block_size);
virtual DUCKDB_API Allocator &GetBufferAllocator();
virtual DUCKDB_API void ReserveMemory(idx_t size);
virtual DUCKDB_API void FreeReservedMemory(idx_t size);
virtual vector<MemoryInformation> GetMemoryUsageInfo() const = 0;
//! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough
//! blocks can be evicted
virtual void SetLimit(idx_t limit = (idx_t)-1);
virtual void SetMemoryLimit(idx_t limit = (idx_t)-1);
virtual void SetSwapLimit(optional_idx limit = optional_idx());

virtual vector<TemporaryFileInformation> GetTemporaryFiles();
virtual const string &GetTemporaryDirectory() const;
virtual void SetTemporaryDirectory(const string &new_dir);
virtual bool HasTemporaryDirectory() const;

//! Construct a managed buffer.
virtual unique_ptr<FileBuffer> ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
FileBufferType type = FileBufferType::MANAGED_BUFFER);
//! Get the underlying buffer pool responsible for managing the buffers
virtual BufferPool &GetBufferPool() const;
//! Get the manager that assigns reservations for temporary memory, e.g., for query intermediates
virtual TemporaryMemoryManager &GetTemporaryMemoryManager();

virtual DatabaseInstance &GetDatabase() = 0;
// Static methods
DUCKDB_API static BufferManager &GetBufferManager(DatabaseInstance &db);
DUCKDB_API static const BufferManager &GetBufferManager(const DatabaseInstance &db);
Expand All @@ -77,6 +86,9 @@ class BufferManager {
//! Returns the maximum available memory for a given query
idx_t GetQueryMaxMemory() const;

//! Get the manager that assigns reservations for temporary memory, e.g., for query intermediates
virtual TemporaryMemoryManager &GetTemporaryMemoryManager();

protected:
virtual void PurgeQueue() = 0;
virtual void AddToEvictionQueue(shared_ptr<BlockHandle> &handle);
Expand Down
31 changes: 22 additions & 9 deletions src/include/duckdb/storage/standard_buffer_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class StandardBufferManager : public BufferManager {

idx_t GetUsedMemory() const final;
idx_t GetMaxMemory() const final;
idx_t GetUsedSwap() final;
optional_idx GetMaxSwap() const final;

//! Allocate an in-memory buffer with a single pin.
//! The allocated memory is released when the buffer handle is destroyed.
Expand All @@ -64,7 +66,8 @@ class StandardBufferManager : public BufferManager {

//! Set a new memory limit to the buffer manager, throws an exception if the new limit is too low and not enough
//! blocks can be evicted
void SetLimit(idx_t limit = (idx_t)-1) final;
void SetMemoryLimit(idx_t limit = (idx_t)-1) final;
void SetSwapLimit(optional_idx limit = optional_idx()) final;

//! Returns informaton about memory usage
vector<MemoryInformation> GetMemoryUsageInfo() const override;
Expand All @@ -73,14 +76,14 @@ class StandardBufferManager : public BufferManager {
vector<TemporaryFileInformation> GetTemporaryFiles() final;

const string &GetTemporaryDirectory() const final {
return temp_directory;
return temporary_directory.path;
}

void SetTemporaryDirectory(const string &new_dir) final;

DUCKDB_API Allocator &GetBufferAllocator() final;

DatabaseInstance &GetDatabase() {
DatabaseInstance &GetDatabase() override {
return db;
}

Expand Down Expand Up @@ -136,17 +139,27 @@ class StandardBufferManager : public BufferManager {
//! overwrites the data within with garbage. Any readers that do not hold the pin will notice
void VerifyZeroReaders(shared_ptr<BlockHandle> &handle);

protected:
// These are stored here because temp_directory creation is lazy
// so we need to store information related to the temporary directory before it's created
struct TemporaryFileData {
//! The directory name where temporary files are stored
string path;
//! Lock for creating the temp handle (marked mutable so 'GetMaxSwap' can be const)
mutable mutex lock;
//! Handle for the temporary directory
unique_ptr<TemporaryDirectoryHandle> handle;
//! The maximum swap space that can be used
optional_idx maximum_swap_space = optional_idx();
};

protected:
//! The database instance
DatabaseInstance &db;
//! The buffer pool
BufferPool &buffer_pool;
//! The directory name where temporary files are stored
string temp_directory;
//! Lock for creating the temp handle
mutex temp_handle_lock;
//! Handle for the temporary directory
unique_ptr<TemporaryDirectoryHandle> temp_directory_handle;
//! The variables related to temporary file management
TemporaryFileData temporary_directory;
//! The temporary id used for managed buffers
atomic<block_id_t> temporary_id;
//! Allocator associated with the buffer manager, that passes all allocations through this buffer manager
Expand Down
24 changes: 20 additions & 4 deletions src/include/duckdb/storage/temporary_file_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ namespace duckdb {
// BlockIndexManager
//===--------------------------------------------------------------------===//

class TemporaryFileManager;

struct BlockIndexManager {
public:
explicit BlockIndexManager(TemporaryFileManager &manager);
BlockIndexManager();

public:
Expand All @@ -37,12 +40,14 @@ struct BlockIndexManager {
bool HasFreeBlocks();

private:
void SetMaxIndex(idx_t blocks);
idx_t GetNewBlockIndexInternal();

private:
idx_t max_index;
set<idx_t> free_indexes;
set<idx_t> indexes_in_use;
optional_ptr<TemporaryFileManager> manager;
};

//===--------------------------------------------------------------------===//
Expand All @@ -69,7 +74,8 @@ class TemporaryFileHandle {
constexpr static idx_t MAX_ALLOWED_INDEX_BASE = 4000;

public:
TemporaryFileHandle(idx_t temp_file_count, DatabaseInstance &db, const string &temp_directory, idx_t index);
TemporaryFileHandle(idx_t temp_file_count, DatabaseInstance &db, const string &temp_directory, idx_t index,
TemporaryFileManager &manager);

public:
struct TemporaryFileLock {
Expand Down Expand Up @@ -103,15 +109,13 @@ class TemporaryFileHandle {
BlockIndexManager index_manager;
};

class TemporaryFileManager;

//===--------------------------------------------------------------------===//
// TemporaryDirectoryHandle
//===--------------------------------------------------------------------===//

class TemporaryDirectoryHandle {
public:
TemporaryDirectoryHandle(DatabaseInstance &db, string path_p);
TemporaryDirectoryHandle(DatabaseInstance &db, string path_p, optional_idx max_swap_space);
~TemporaryDirectoryHandle();

TemporaryFileManager &GetTempFile();
Expand All @@ -130,6 +134,7 @@ class TemporaryDirectoryHandle {
class TemporaryFileManager {
public:
TemporaryFileManager(DatabaseInstance &db, const string &temp_directory_p);
~TemporaryFileManager();

public:
struct TemporaryManagerLock {
Expand All @@ -145,6 +150,13 @@ class TemporaryFileManager {
unique_ptr<FileBuffer> ReadTemporaryBuffer(block_id_t id, unique_ptr<FileBuffer> reusable_buffer);
void DeleteTemporaryBuffer(block_id_t id);
vector<TemporaryFileInformation> GetTemporaryFiles();
idx_t GetTotalUsedSpaceInBytes();
optional_idx GetMaxSwapSpace() const;
void SetMaxSwapSpace(optional_idx limit);
//! Register temporary file size growth
void IncreaseSizeOnDisk(idx_t amount);
//! Register temporary file size decrease
void DecreaseSizeOnDisk(idx_t amount);

private:
void EraseUsedBlock(TemporaryManagerLock &lock, block_id_t id, TemporaryFileHandle *handle,
Expand All @@ -164,6 +176,10 @@ class TemporaryFileManager {
unordered_map<block_id_t, TemporaryFileIndex> used_blocks;
//! Manager of in-use temporary file indexes
BlockIndexManager index_manager;
//! The size in bytes of the temporary files that are currently alive
atomic<idx_t> size_on_disk;
//! The max amount of disk space that can be used
idx_t max_swap_space;
};

} // namespace duckdb
25 changes: 25 additions & 0 deletions src/main/config.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "duckdb/main/config.hpp"

#include "duckdb/common/operator/multiply.hpp"
#include "duckdb/common/operator/cast_operators.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/main/settings.hpp"
Expand Down Expand Up @@ -96,6 +97,7 @@ static const ConfigurationOption internal_options[] = {
DUCKDB_LOCAL(IntegerDivisionSetting),
DUCKDB_LOCAL(MaximumExpressionDepthSetting),
DUCKDB_GLOBAL(MaximumMemorySetting),
DUCKDB_GLOBAL(MaximumTempDirectorySize),
DUCKDB_GLOBAL(OldImplicitCasting),
DUCKDB_GLOBAL_ALIAS("memory_limit", MaximumMemorySetting),
DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting),
Expand Down Expand Up @@ -247,6 +249,21 @@ void DBConfig::AddExtensionOption(const string &name, string description, Logica
}
}

bool DBConfig::IsInMemoryDatabase(const char *database_path) {
if (!database_path) {
// Entirely empty
return true;
}
if (strlen(database_path) == 0) {
// '' empty string
return true;
}
if (strcmp(database_path, ":memory:") == 0) {
return true;
}
return false;
}

CastFunctionSet &DBConfig::GetCastFunctions() {
return *cast_functions;
}
Expand All @@ -262,6 +279,14 @@ void DBConfig::SetDefaultMaxMemory() {
}
}

void DBConfig::SetDefaultTempDirectory() {
if (DBConfig::IsInMemoryDatabase(options.database_path.c_str())) {
options.temporary_directory = ".tmp";
} else {
options.temporary_directory = options.database_path + ".tmp";
}
}

void DBConfig::CheckLock(const string &name) {
if (!options.lock_configuration) {
// not locked
Expand Down
Loading

0 comments on commit c07b645

Please sign in to comment.