Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move PG code out of filter logic #487

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions include/pgduckdb/pg/datum.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#include "pgduckdb/pg/declarations.hpp"

namespace pgduckdb::pg {
int GetDetoastedDatumLen(const Datum value, bool is_bpchar);
const char* GetDetoastedDatumVal(const Datum value);

bool DatumGetBool(Datum datum);
char DatumGetChar(Datum datum);
int16_t DatumGetInt16(Datum datum);
int32_t DatumGetInt32(Datum datum);
int64_t DatumGetInt64(Datum datum);
float DatumGetFloat4(Datum datum);
double DatumGetFloat8(Datum datum);
int32_t DatumGetDateADT(Datum datum);
int64_t DatumGetTimestamp(Datum datum);
int64_t DatumGetTimestampTz(Datum datum);

} // namespace pgduckdb::pg
2 changes: 2 additions & 0 deletions include/pgduckdb/pg/declarations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,6 @@ struct TableAmRoutine;
typedef uint32_t CommandId;

typedef uint32_t SubTransactionId;

struct varlena;
}
5 changes: 2 additions & 3 deletions include/pgduckdb/pgduckdb_detoast.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#pragma once

extern "C" {
#include "postgres.h"
}
#include "pgduckdb/pg/declarations.hpp"
#include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include.

namespace pgduckdb {

Expand Down
11 changes: 7 additions & 4 deletions include/pgduckdb/pgduckdb_filter.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#pragma once

#include "duckdb.hpp"
#include "pgduckdb/pg/declarations.hpp"

extern "C" {
#include "postgres.h"
}
#include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include.


namespace duckdb {
class TableFilter;
} // namespace duckdb

namespace pgduckdb {

Expand Down
61 changes: 61 additions & 0 deletions src/pg/datum.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include "pgduckdb/pg/datum.hpp"

extern "C" {
#include "postgres.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/timestamp.h"
#if PG_VERSION_NUM >= 160000
#include "varatt.h"
#endif
}

namespace pgduckdb::pg {
int GetDetoastedDatumLen(const Datum value, bool is_bpchar) {
return is_bpchar ? bpchartruelen(VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value))
: VARSIZE_ANY_EXHDR(value);
}
const char* GetDetoastedDatumVal(const Datum value) {
return static_cast<const char *>(VARDATA_ANY(value));
}

bool DatumGetBool(Datum datum) {
return ::DatumGetBool(datum);
}
Comment on lines +22 to +24
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm kinda worried that for these tiny functions the call overhead of this could be significant. Postgres defines all these as static inline functions.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear, I'm thinking that maybe we should vendor in the required functions from Postgres in our own header instead.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I was thinking the same - since we decided we didn't really care in a previous PR I went that way, but I was about to ping you exactly for that :-)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, for some other places I wasn't too worried. But these seem like they're likely to be used in the hot-path. Still even the indirection might be fine there.


char DatumGetChar(Datum datum) {
return ::DatumGetChar(datum);
}

int16_t DatumGetInt16(Datum datum) {
return ::DatumGetInt16(datum);
}

int32_t DatumGetInt32(Datum datum) {
return ::DatumGetInt32(datum);
}

int64_t DatumGetInt64(Datum datum) {
return ::DatumGetInt64(datum);
}

float DatumGetFloat4(Datum datum) {
return ::DatumGetFloat4(datum);
}

double DatumGetFloat8(Datum datum) {
return ::DatumGetFloat8(datum);
}

int32_t DatumGetDateADT(Datum datum) {
return ::DatumGetDateADT(datum);
}

int64_t DatumGetTimestamp(Datum datum) {
return ::DatumGetTimestamp(datum);
}

int64_t DatumGetTimestampTz(Datum datum) {
return ::DatumGetTimestampTz(datum);
}
} // namespace pgduckdb::pg
2 changes: 1 addition & 1 deletion src/pgduckdb_detoast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/pgduckdb_utils.hpp"
#include "pgduckdb/pgduckdb_detoast.hpp"

extern "C" {
#include "postgres.h"
Expand All @@ -23,7 +24,6 @@ extern "C" {
}

#include "pgduckdb/pgduckdb_process_lock.hpp"
#include "pgduckdb/pgduckdb_detoast.hpp"

/*
* Following functions are direct logic found in postgres code but for duckdb execution they are needed to be thread
Expand Down
49 changes: 25 additions & 24 deletions src/pgduckdb_filter.cpp
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
#include "pgduckdb/pgduckdb_filter.hpp"

#include "duckdb.hpp"
#include "duckdb/planner/filter/constant_filter.hpp"
#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/pg/declarations.hpp"
#include "pgduckdb/pg/datum.hpp"
#include "pgduckdb/pgduckdb_detoast.hpp"


extern "C" {
#include "postgres.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/timestamp.h"
#if PG_VERSION_NUM >= 160000
#include "varatt.h"
#endif
// Exceptional include of PG Header which doesn't contain any function declaration
// only macro definitions and types.
#include "catalog/pg_type_d.h"
}

#include "pgduckdb/pgduckdb_filter.hpp"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mkaruza do we actually still need this filter logic after your #477 change, because there we're pushing those filters to postgres.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought there were filters we can't push down?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but those we then also shouldn't receive from DuckDB. They should simply be executed by DuckDB.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah because everything will be emitted to DuckDB values after 477 lands. I think you're right. I'll keep this one on standby but will move to another file.

#include "pgduckdb/pgduckdb_detoast.hpp"
#include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include.


namespace pgduckdb {

Expand All @@ -26,7 +27,7 @@ TemplatedFilterOperation(const T &value, const duckdb::Value &constant) {

template <class OP>
bool
StringFilterOperation(const Datum &value, const duckdb::Value &constant, bool is_bpchar) {
StringFilterOperation(const Datum value, const duckdb::Value &constant, bool is_bpchar) {
if (value == (Datum)0 || constant.IsNull()) {
return false; // Comparison to NULL always returns false.
}
Expand All @@ -35,10 +36,10 @@ StringFilterOperation(const Datum &value, const duckdb::Value &constant, bool is
const auto detoasted_value = DetoastPostgresDatum(reinterpret_cast<varlena *>(value), &should_free);

/* bpchar adds zero padding so we need to read true len of bpchar */
auto detoasted_val_len = is_bpchar ? bpchartruelen(VARDATA_ANY(detoasted_value), VARSIZE_ANY_EXHDR(detoasted_value))
: VARSIZE_ANY_EXHDR(detoasted_value);
const auto detoasted_val_len = pg::GetDetoastedDatumLen(detoasted_value, is_bpchar);
const auto detoasted_val = pg::GetDetoastedDatumVal(detoasted_value);

const auto datum_sv = std::string_view((const char *)VARDATA_ANY(detoasted_value), detoasted_val_len);
const auto datum_sv = std::string_view(detoasted_val, detoasted_val_len);
const auto val = duckdb::StringValue::Get(constant);
const auto val_sv = std::string_view(val);
const bool res = OP::Operation(datum_sv, val_sv);
Expand All @@ -54,29 +55,29 @@ static bool
FilterOperationSwitch(const Datum &value, const duckdb::Value &constant, Oid type_oid) {
switch (type_oid) {
case BOOLOID:
return TemplatedFilterOperation<bool, OP>(DatumGetBool(value), constant);
return TemplatedFilterOperation<bool, OP>(pg::DatumGetBool(value), constant);
case CHAROID:
return TemplatedFilterOperation<uint8_t, OP>(DatumGetChar(value), constant);
return TemplatedFilterOperation<uint8_t, OP>(pg::DatumGetChar(value), constant);
case INT2OID:
return TemplatedFilterOperation<int16_t, OP>(DatumGetInt16(value), constant);
return TemplatedFilterOperation<int16_t, OP>(pg::DatumGetInt16(value), constant);
case INT4OID:
return TemplatedFilterOperation<int32_t, OP>(DatumGetInt32(value), constant);
return TemplatedFilterOperation<int32_t, OP>(pg::DatumGetInt32(value), constant);
case INT8OID:
return TemplatedFilterOperation<int64_t, OP>(DatumGetInt64(value), constant);
return TemplatedFilterOperation<int64_t, OP>(pg::DatumGetInt64(value), constant);
case FLOAT4OID:
return TemplatedFilterOperation<float, OP>(DatumGetFloat4(value), constant);
return TemplatedFilterOperation<float, OP>(pg::DatumGetFloat4(value), constant);
case FLOAT8OID:
return TemplatedFilterOperation<double, OP>(DatumGetFloat8(value), constant);
return TemplatedFilterOperation<double, OP>(pg::DatumGetFloat8(value), constant);
case DATEOID: {
int32_t date = DatumGetDateADT(value) + pgduckdb::PGDUCKDB_DUCK_DATE_OFFSET;
int32_t date = pg::DatumGetDateADT(value) + pgduckdb::PGDUCKDB_DUCK_DATE_OFFSET;
return TemplatedFilterOperation<int32_t, OP>(date, constant);
}
case TIMESTAMPOID: {
int64_t timestamp = DatumGetTimestamp(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET;
int64_t timestamp = pg::DatumGetTimestamp(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET;
return TemplatedFilterOperation<int64_t, OP>(timestamp, constant);
}
case TIMESTAMPTZOID: {
int64_t timestamptz = DatumGetTimestampTz(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET;
int64_t timestamptz = pg::DatumGetTimestampTz(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET;
return TemplatedFilterOperation<int64_t, OP>(timestamptz, constant);
}
case BPCHAROID:
Expand Down
5 changes: 2 additions & 3 deletions src/pgduckdb_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include "pgduckdb/pgduckdb_types.hpp"
#include "pgduckdb/pgduckdb_utils.hpp"
#include "pgduckdb/scan/postgres_scan.hpp"
#include "pgduckdb/pgduckdb_detoast.hpp"
#include "pgduckdb/pgduckdb_filter.hpp"

extern "C" {

Expand All @@ -28,9 +30,6 @@ extern "C" {
#include "utils/timestamp.h"
}

#include "pgduckdb/pgduckdb_filter.hpp"
#include "pgduckdb/pgduckdb_detoast.hpp"

namespace pgduckdb {

NumericVar FromNumeric(Numeric num);
Expand Down
Loading