Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve bitpacking skip performance + better testing of FetchRow #10295

Merged
merged 10 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
improve bp skip and fix bp bug
  • Loading branch information
samansmink committed Jan 16, 2024
commit b3d7193ac5c0243222c93042fa1011218a26b4db
9 changes: 8 additions & 1 deletion scripts/run_tests_one_by_one.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
no_exit = False
profile = False
assertions = True
force_storage_flag = ''

for i in range(len(sys.argv)):
if sys.argv[i] == '--no-exit':
Expand All @@ -21,17 +22,23 @@
assertions = False
del sys.argv[i]
i -= 1
elif sys.argv[i] == '--force-storage':
force_storage_flag = '--force-storage'
del sys.argv[i]
i -= 1

if len(sys.argv) < 2:
print(
"Expected usage: python3 scripts/run_tests_one_by_one.py build/debug/test/unittest [--no-exit] [--profile] [--no-assertions]"
"Expected usage: python3 scripts/run_tests_one_by_one.py build/debug/test/unittest [--no-exit] [--profile] [--no-assertions] [--force-storage] [--add-timing]"
)
exit(1)
unittest_program = sys.argv[1]
extra_args = []
if len(sys.argv) > 2:
extra_args = [sys.argv[2]]

if force_storage_flag != '':
extra_args.append(force_storage_flag)

proc = subprocess.Popen([unittest_program, '-l'] + extra_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout = proc.stdout.read().decode('utf8')
Expand Down
10 changes: 10 additions & 0 deletions src/function/table/table_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "duckdb/storage/table/scan_state.hpp"
#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/serializer/deserializer.hpp"
#include "duckdb/main/client_data.hpp"

namespace duckdb {

Expand All @@ -30,6 +31,8 @@ struct TableScanLocalState : public LocalTableFunctionState {
TableScanState scan_state;
//! The DataChunk containing all read columns (even filter columns that are immediately removed)
DataChunk all_columns;
//! (Debug) settings on how to perform the scan
TableScanOptions scan_options;
};

static storage_t GetStorageIndex(TableCatalogEntry &table, column_t column_id) {
Expand Down Expand Up @@ -77,6 +80,9 @@ static unique_ptr<LocalTableFunctionState> TableScanInitLocal(ExecutionContext &
auto &tsgs = gstate->Cast<TableScanGlobalState>();
result->all_columns.Initialize(context.client, tsgs.scanned_types);
}

result->scan_options.force_fetch_row = ClientConfig::GetConfig(context.client).force_fetch_row;

return std::move(result);
}

Expand Down Expand Up @@ -117,6 +123,8 @@ static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, Da
auto &state = data_p.local_state->Cast<TableScanLocalState>();
auto &transaction = DuckTransaction::Get(context, bind_data.table.catalog);
auto &storage = bind_data.table.GetStorage();

state.scan_state.options.force_fetch_row = ClientConfig::GetConfig(context).force_fetch_row;
do {
if (bind_data.is_create_index) {
storage.CreateIndexScan(state.scan_state, output,
Expand Down Expand Up @@ -222,6 +230,8 @@ static unique_ptr<GlobalTableFunctionState> IndexScanInitGlobal(ClientContext &c
auto result = make_uniq<IndexScanGlobalState>(row_id_data);
auto &local_storage = LocalStorage::Get(context, bind_data.table.catalog);

result->local_storage_state.options.force_fetch_row = ClientConfig::GetConfig(context).force_fetch_row;

result->column_ids.reserve(input.column_ids.size());
for (auto &id : input.column_ids) {
result->column_ids.push_back(GetStorageIndex(bind_data.table, id));
Expand Down
2 changes: 2 additions & 0 deletions src/include/duckdb/main/client_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ struct ClientConfig {
bool force_no_cross_product = false;
//! Force use of IEJoin to implement AsOfJoin, used for testing
bool force_asof_iejoin = false;
//! Force use of fetch row instead of scan, used for testing
bool force_fetch_row = false;
//! Use range joins for inequalities, even if there are equality predicates
bool prefer_range_joins = false;
//! If this context should also try to use the available replacement scans
Expand Down
1 change: 1 addition & 0 deletions src/include/duckdb/storage/table/column_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class RowGroupWriter;
class TableDataWriter;
class TableStorageInfo;
struct TransactionData;
struct TableScanOptions;

struct DataTableInfo;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "duckdb/storage/table/column_checkpoint_state.hpp"

namespace duckdb {
struct TableScanOptions;

class ColumnDataCheckpointer {
public:
Expand All @@ -30,7 +31,7 @@ class ColumnDataCheckpointer {
CompressionFunction &GetCompressionFunction(CompressionType type);

private:
void ScanSegments(const std::function<void(Vector &, idx_t)> &callback);
void ScanSegments(const std::function<void(Vector &, idx_t)> &callback, TableScanOptions &options);
unique_ptr<AnalyzeState> DetectBestCompressionMethod(idx_t &compression_idx);
void WriteToDisk();
bool HasChanges();
Expand Down
13 changes: 12 additions & 1 deletion src/include/duckdb/storage/table/scan_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class TableFilterSet;
class ColumnData;
class DuckTransaction;
class RowGroupSegmentTree;
struct TableScanOptions;

struct SegmentScanState {
virtual ~SegmentScanState() {
Expand Down Expand Up @@ -92,9 +93,11 @@ struct ColumnScanState {
vector<unique_ptr<SegmentScanState>> previous_states;
//! The last read offset in the child state (used for LIST columns only)
idx_t last_offset = 0;
//! Contains TableScan level config for scanning
optional_ptr<TableScanOptions> scan_options;

public:
void Initialize(const LogicalType &type);
void Initialize(const LogicalType &type, optional_ptr<TableScanOptions> options);
//! Move the scan state forward by "count" rows (including all child states)
void Next(idx_t count);
//! Move ONLY this state forward by "count" rows (i.e. not the child states)
Expand Down Expand Up @@ -134,6 +137,7 @@ class CollectionScanState {
const vector<storage_t> &GetColumnIds();
TableFilterSet *GetFilters();
AdaptiveFilter *GetAdaptiveFilter();
TableScanOptions &GetOptions();
bool Scan(DuckTransaction &transaction, DataChunk &result);
bool ScanCommitted(DataChunk &result, TableScanType type);
bool ScanCommitted(DataChunk &result, SegmentLock &l, TableScanType type);
Expand All @@ -142,6 +146,11 @@ class CollectionScanState {
TableScanState &parent;
};

struct TableScanOptions {
//! Test config that forces fetching rows one by one instead of regular scans
bool force_fetch_row = false;
};

class TableScanState {
public:
TableScanState() : table_state(*this), local_state(*this), table_filters(nullptr) {};
Expand All @@ -150,6 +159,8 @@ class TableScanState {
CollectionScanState table_state;
//! Transaction-local scan state
CollectionScanState local_state;
//! Options for scanning
TableScanOptions options;

public:
void Initialize(vector<storage_t> column_ids, TableFilterSet *table_filters = nullptr);
Expand Down
25 changes: 25 additions & 0 deletions src/include/duckdb/verification/fetch_row_verifier.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb/verification/unoptimized_statement_verifier.hpp
//
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/verification/statement_verifier.hpp"

namespace duckdb {

class FetchRowVerifier : public StatementVerifier {
public:
explicit FetchRowVerifier(unique_ptr<SQLStatement> statement_p);
static unique_ptr<StatementVerifier> Create(const SQLStatement &statement_p);

bool ForceFetchRow() const override {
return true;
}
};

} // namespace duckdb
5 changes: 5 additions & 0 deletions src/include/duckdb/verification/statement_verifier.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ enum class VerificationType : uint8_t {
NO_OPERATOR_CACHING,
PREPARED,
EXTERNAL,
FETCH_ROW_AS_SCAN,

INVALID
};
Expand Down Expand Up @@ -67,6 +68,10 @@ class StatementVerifier {
virtual bool ForceExternal() const {
return false;
}

virtual bool ForceFetchRow() const {
return false;
}
};

} // namespace duckdb
1 change: 1 addition & 0 deletions src/main/client_verify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ PreservedError ClientContext::VerifyQuery(ClientContextLock &lock, const string
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::COPIED, stmt));
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED, stmt));
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::UNOPTIMIZED, stmt));
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::FETCH_ROW_AS_SCAN, stmt));
prepared_statement_verifier = StatementVerifier::Create(VerificationType::PREPARED, stmt);
#ifdef DUCKDB_DEBUG_ASYNC_SINK_SOURCE
// This verification is quite slow, so we only run it for the async sink/source debug mode
Expand Down
Loading