Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve bitpacking skip performance + better testing of FetchRow #10295

Merged
merged 10 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/Main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ jobs:
BUILD_JSON: 1
BUILD_EXCEL: 1
BUILD_JEMALLOC: 1
RUN_SLOW_VERIFIERS: 1

steps:
- uses: actions/checkout@v3
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/NightlyTests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ jobs:
BUILD_JEMALLOC: 1
DISABLE_SANITIZER: 1
CRASH_ON_ASSERT: 1
RUN_SLOW_VERIFIERS: 1

steps:
- uses: actions/checkout@v3
Expand Down
12 changes: 8 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@
endif()

option(EXPLICIT_EXCEPTIONS "Explicitly enable C++ exceptions." FALSE)
if(${EXPLICIT_EXCEPTIONS})

Check warning on line 206 in CMakeLists.txt

View workflow job for this annotation

GitHub Actions / Windows Extensions (64-bit)

TIFF support is not enabled and will result in the inability to read some
set(CXX_EXTRA "${CXX_EXTRA} -fexceptions")
endif()

Expand Down Expand Up @@ -320,6 +320,7 @@
option(DISABLE_MEMORY_SAFETY "Debug setting: disable memory access checks at runtime" FALSE)
option(DISABLE_ASSERTIONS "Debug setting: disable assertions" FALSE)
option(ALTERNATIVE_VERIFY "Debug setting: use alternative verify mode" FALSE)
option(RUN_SLOW_VERIFIERS "Debug setting: enable a more extensive set of verifiers" FALSE)
option(DESTROY_UNPINNED_BLOCKS "Debug setting: destroy unpinned buffer-managed blocks" FALSE)
option(FORCE_ASYNC_SINK_SOURCE "Debug setting: forces sinks/sources to block the first 2 times they're called" FALSE)
option(DEBUG_STACKTRACE "Debug setting: print a stracktrace on asserts and when testing crashes" FALSE)
Expand Down Expand Up @@ -385,6 +386,10 @@
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDUCKDB_DEBUG_ASYNC_SINK_SOURCE")
endif()

if(RUN_SLOW_VERIFIERS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDUCKDB_RUN_SLOW_VERIFIERS")
endif()

if(ALTERNATIVE_VERIFY)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDUCKDB_ALTERNATIVE_VERIFY")
endif()
Expand Down Expand Up @@ -439,7 +444,7 @@
AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${CXX_EXTRA_DEBUG}")
else()
message(WARNING "Please use a recent compiler for debug builds")

Check warning on line 447 in CMakeLists.txt

View workflow job for this annotation

GitHub Actions / Linux Extensions (x64)

Please use a recent compiler for debug builds

Check warning on line 447 in CMakeLists.txt

View workflow job for this annotation

GitHub Actions / Linux Extensions (x64)

Please use a recent compiler for debug builds
endif()
else()
set(CMAKE_CXX_WINDOWS_FLAGS
Expand Down Expand Up @@ -1036,6 +1041,8 @@
FILE(APPEND ${CMAKE_BINARY_DIR}/extensions.txt "${EXT_NAME}\r")
endif()
endforeach()

return()
endif()

if(BUILD_PYTHON)
Expand Down Expand Up @@ -1112,7 +1119,6 @@
message(STATUS "Extensions will be deployed to: ${LOCAL_EXTENSION_REPO}")
endif()

if (NOT EXTENSION_CONFIG_BUILD )
# Write the export set for build and install tree
install(EXPORT "${DUCKDB_EXPORT_SET}" DESTINATION "${INSTALL_CMAKE_DIR}")
export(EXPORT "${DUCKDB_EXPORT_SET}"
Expand Down Expand Up @@ -1146,6 +1152,4 @@
FILES "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/DuckDBConfig.cmake"
"${PROJECT_BINARY_DIR}/DuckDBConfigVersion.cmake"
DESTINATION "${INSTALL_CMAKE_DIR}")
endif()

endif()
endif()
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ endif
ifeq (${FORCE_ASYNC_SINK_SOURCE}, 1)
CMAKE_VARS:=${CMAKE_VARS} -DFORCE_ASYNC_SINK_SOURCE=1
endif
ifeq (${RUN_SLOW_VERIFIERS}, 1)
CMAKE_VARS:=${CMAKE_VARS} -DRUN_SLOW_VERIFIERS=1
endif
ifeq (${ALTERNATIVE_VERIFY}, 1)
CMAKE_VARS:=${CMAKE_VARS} -DALTERNATIVE_VERIFY=1
endif
Expand Down
5 changes: 5 additions & 0 deletions src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6465,6 +6465,8 @@ const char* EnumUtil::ToChars<VerificationType>(VerificationType value) {
return "PREPARED";
case VerificationType::EXTERNAL:
return "EXTERNAL";
case VerificationType::FETCH_ROW_AS_SCAN:
return "FETCH_ROW_AS_SCAN";
case VerificationType::INVALID:
return "INVALID";
default:
Expand Down Expand Up @@ -6498,6 +6500,9 @@ VerificationType EnumUtil::FromString<VerificationType>(const char *value) {
if (StringUtil::Equals(value, "EXTERNAL")) {
return VerificationType::EXTERNAL;
}
if (StringUtil::Equals(value, "FETCH_ROW_AS_SCAN")) {
return VerificationType::FETCH_ROW_AS_SCAN;
}
if (StringUtil::Equals(value, "INVALID")) {
return VerificationType::INVALID;
}
Expand Down
11 changes: 11 additions & 0 deletions src/function/pragma/pragma_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ static void PragmaDisableExternalVerification(ClientContext &context, const Func
ClientConfig::GetConfig(context).verify_external = false;
}

static void PragmaEnableFetchRowVerification(ClientContext &context, const FunctionParameters &parameters) {
ClientConfig::GetConfig(context).verify_fetch_row = true;
}

static void PragmaDisableFetchRowVerification(ClientContext &context, const FunctionParameters &parameters) {
ClientConfig::GetConfig(context).verify_fetch_row = false;
}

static void PragmaEnableForceParallelism(ClientContext &context, const FunctionParameters &parameters) {
ClientConfig::GetConfig(context).verify_parallelism = true;
}
Expand Down Expand Up @@ -125,6 +133,9 @@ void PragmaFunctions::RegisterFunction(BuiltinFunctions &set) {
set.AddFunction(PragmaFunction::PragmaStatement("verify_external", PragmaEnableExternalVerification));
set.AddFunction(PragmaFunction::PragmaStatement("disable_verify_external", PragmaDisableExternalVerification));

set.AddFunction(PragmaFunction::PragmaStatement("verify_fetch_row", PragmaEnableFetchRowVerification));
set.AddFunction(PragmaFunction::PragmaStatement("disable_verify_fetch_row", PragmaDisableFetchRowVerification));

set.AddFunction(PragmaFunction::PragmaStatement("verify_serializer", PragmaVerifySerializer));
set.AddFunction(PragmaFunction::PragmaStatement("disable_verify_serializer", PragmaDisableVerifySerializer));

Expand Down
8 changes: 8 additions & 0 deletions src/function/table/table_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "duckdb/storage/table/scan_state.hpp"
#include "duckdb/transaction/duck_transaction.hpp"
#include "duckdb/transaction/local_storage.hpp"
#include "duckdb/main/client_data.hpp"

namespace duckdb {

Expand Down Expand Up @@ -78,6 +79,9 @@ static unique_ptr<LocalTableFunctionState> TableScanInitLocal(ExecutionContext &
auto &tsgs = gstate->Cast<TableScanGlobalState>();
result->all_columns.Initialize(context.client, tsgs.scanned_types);
}

result->scan_state.options.force_fetch_row = ClientConfig::GetConfig(context.client).force_fetch_row;

return std::move(result);
}

Expand Down Expand Up @@ -118,6 +122,8 @@ static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, Da
auto &state = data_p.local_state->Cast<TableScanLocalState>();
auto &transaction = DuckTransaction::Get(context, bind_data.table.catalog);
auto &storage = bind_data.table.GetStorage();

state.scan_state.options.force_fetch_row = ClientConfig::GetConfig(context).force_fetch_row;
do {
if (bind_data.is_create_index) {
storage.CreateIndexScan(state.scan_state, output,
Expand Down Expand Up @@ -223,6 +229,8 @@ static unique_ptr<GlobalTableFunctionState> IndexScanInitGlobal(ClientContext &c
auto result = make_uniq<IndexScanGlobalState>(row_id_data);
auto &local_storage = LocalStorage::Get(context, bind_data.table.catalog);

result->local_storage_state.options.force_fetch_row = ClientConfig::GetConfig(context).force_fetch_row;

result->column_ids.reserve(input.column_ids.size());
for (auto &id : input.column_ids) {
result->column_ids.push_back(GetStorageIndex(bind_data.table, id));
Expand Down
6 changes: 5 additions & 1 deletion src/include/duckdb/main/client_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ struct ClientConfig {
bool query_verification_enabled = false;
//! Whether or not verification of external operators is enabled, used for testing
bool verify_external = false;
//! Whether or not verification of fetch row code is enabled, used for testing
bool verify_fetch_row = false;
//! Whether or not we should verify the serializer
bool verify_serializer = false;
//! Enable the running of optimizers
Expand All @@ -73,6 +75,8 @@ struct ClientConfig {
bool force_no_cross_product = false;
//! Force use of IEJoin to implement AsOfJoin, used for testing
bool force_asof_iejoin = false;
//! Force use of fetch row instead of scan, used for testing
bool force_fetch_row = false;
//! Use range joins for inequalities, even if there are equality predicates
bool prefer_range_joins = false;
//! If this context should also try to use the available replacement scans
Expand Down Expand Up @@ -116,7 +120,7 @@ struct ClientConfig {
static const ClientConfig &GetConfig(const ClientContext &context);

bool AnyVerification() {
return query_verification_enabled || verify_external || verify_serializer;
return query_verification_enabled || verify_external || verify_serializer || verify_fetch_row;
}
};

Expand Down
1 change: 1 addition & 0 deletions src/include/duckdb/storage/table/column_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class RowGroupWriter;
class TableDataWriter;
class TableStorageInfo;
struct TransactionData;
struct TableScanOptions;

struct DataTableInfo;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "duckdb/storage/table/column_checkpoint_state.hpp"

namespace duckdb {
struct TableScanOptions;

class ColumnDataCheckpointer {
public:
Expand Down
13 changes: 12 additions & 1 deletion src/include/duckdb/storage/table/scan_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class TableFilterSet;
class ColumnData;
class DuckTransaction;
class RowGroupSegmentTree;
struct TableScanOptions;

struct SegmentScanState {
virtual ~SegmentScanState() {
Expand Down Expand Up @@ -92,9 +93,11 @@ struct ColumnScanState {
vector<unique_ptr<SegmentScanState>> previous_states;
//! The last read offset in the child state (used for LIST columns only)
idx_t last_offset = 0;
//! Contains TableScan level config for scanning
optional_ptr<TableScanOptions> scan_options;

public:
void Initialize(const LogicalType &type);
void Initialize(const LogicalType &type, optional_ptr<TableScanOptions> options);
//! Move the scan state forward by "count" rows (including all child states)
void Next(idx_t count);
//! Move ONLY this state forward by "count" rows (i.e. not the child states)
Expand Down Expand Up @@ -134,6 +137,7 @@ class CollectionScanState {
const vector<storage_t> &GetColumnIds();
TableFilterSet *GetFilters();
AdaptiveFilter *GetAdaptiveFilter();
TableScanOptions &GetOptions();
bool Scan(DuckTransaction &transaction, DataChunk &result);
bool ScanCommitted(DataChunk &result, TableScanType type);
bool ScanCommitted(DataChunk &result, SegmentLock &l, TableScanType type);
Expand All @@ -142,6 +146,11 @@ class CollectionScanState {
TableScanState &parent;
};

struct TableScanOptions {
//! Test config that forces fetching rows one by one instead of regular scans
bool force_fetch_row = false;
};

class TableScanState {
public:
TableScanState() : table_state(*this), local_state(*this), table_filters(nullptr) {};
Expand All @@ -150,6 +159,8 @@ class TableScanState {
CollectionScanState table_state;
//! Transaction-local scan state
CollectionScanState local_state;
//! Options for scanning
TableScanOptions options;

public:
void Initialize(vector<storage_t> column_ids, TableFilterSet *table_filters = nullptr);
Expand Down
25 changes: 25 additions & 0 deletions src/include/duckdb/verification/fetch_row_verifier.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb/verification/unoptimized_statement_verifier.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/verification/statement_verifier.hpp"

namespace duckdb {

class FetchRowVerifier : public StatementVerifier {
public:
explicit FetchRowVerifier(unique_ptr<SQLStatement> statement_p);
static unique_ptr<StatementVerifier> Create(const SQLStatement &statement_p);

bool ForceFetchRow() const override {
return true;
}
};

} // namespace duckdb
5 changes: 5 additions & 0 deletions src/include/duckdb/verification/statement_verifier.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ enum class VerificationType : uint8_t {
NO_OPERATOR_CACHING,
PREPARED,
EXTERNAL,
FETCH_ROW_AS_SCAN,

INVALID
};
Expand Down Expand Up @@ -67,6 +68,10 @@ class StatementVerifier {
virtual bool ForceExternal() const {
return false;
}

virtual bool ForceFetchRow() const {
return false;
}
};

} // namespace duckdb
22 changes: 20 additions & 2 deletions src/main/client_verify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ PreservedError ClientContext::VerifyQuery(ClientContextLock &lock, const string
D_ASSERT(statement->type == StatementType::SELECT_STATEMENT);
// Aggressive query verification

#ifdef DUCKDB_RUN_SLOW_VERIFIERS
bool run_slow_verifiers = true;
#else
bool run_slow_verifiers = false;
#endif

// The purpose of this function is to test correctness of otherwise hard to test features:
// Copy() of statements and expressions
// Serialize()/Deserialize() of expressions
Expand All @@ -37,16 +43,28 @@ PreservedError ClientContext::VerifyQuery(ClientContextLock &lock, const string
const auto &stmt = *statement;
vector<unique_ptr<StatementVerifier>> statement_verifiers;
unique_ptr<StatementVerifier> prepared_statement_verifier;

// Base Statement verifiers: these are the verifiers we enable for regular builds
if (config.query_verification_enabled) {
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::COPIED, stmt));
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED, stmt));
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::UNOPTIMIZED, stmt));
prepared_statement_verifier = StatementVerifier::Create(VerificationType::PREPARED, stmt);
}

// This verifier is enabled explicitly OR by enabling run_slow_verifiers
if (config.verify_fetch_row || (run_slow_verifiers && config.query_verification_enabled)) {
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::FETCH_ROW_AS_SCAN, stmt));
}

// For the DEBUG_ASYNC build we enable this extra verifier
#ifdef DUCKDB_DEBUG_ASYNC_SINK_SOURCE
// This verification is quite slow, so we only run it for the async sink/source debug mode
if (config.query_verification_enabled) {
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::NO_OPERATOR_CACHING, stmt));
#endif
}
#endif

// Verify external always needs to be explicitly enabled and is never part of default verifier set
if (config.verify_external) {
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::EXTERNAL, stmt));
}
Expand Down
Loading
Loading