-
Notifications
You must be signed in to change notification settings - Fork 71
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move PG code out of filter logic #487
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#pragma once | ||
|
||
#include "pgduckdb/pg/declarations.hpp" | ||
|
||
namespace pgduckdb::pg { | ||
int GetDetoastedDatumLen(const Datum value, bool is_bpchar); | ||
const char* GetDetoastedDatumVal(const Datum value); | ||
|
||
bool DatumGetBool(Datum datum); | ||
char DatumGetChar(Datum datum); | ||
int16_t DatumGetInt16(Datum datum); | ||
int32_t DatumGetInt32(Datum datum); | ||
int64_t DatumGetInt64(Datum datum); | ||
float DatumGetFloat4(Datum datum); | ||
double DatumGetFloat8(Datum datum); | ||
int32_t DatumGetDateADT(Datum datum); | ||
int64_t DatumGetTimestamp(Datum datum); | ||
int64_t DatumGetTimestampTz(Datum datum); | ||
|
||
} // namespace pgduckdb::pg |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,4 +66,6 @@ struct TableAmRoutine; | |
typedef uint32_t CommandId; | ||
|
||
typedef uint32_t SubTransactionId; | ||
|
||
struct varlena; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#include "pgduckdb/pg/datum.hpp" | ||
|
||
extern "C" { | ||
#include "postgres.h" | ||
#include "utils/builtins.h" | ||
#include "utils/date.h" | ||
#include "utils/timestamp.h" | ||
#if PG_VERSION_NUM >= 160000 | ||
#include "varatt.h" | ||
#endif | ||
} | ||
|
||
namespace pgduckdb::pg { | ||
int GetDetoastedDatumLen(const Datum value, bool is_bpchar) { | ||
return is_bpchar ? bpchartruelen(VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value)) | ||
: VARSIZE_ANY_EXHDR(value); | ||
} | ||
const char* GetDetoastedDatumVal(const Datum value) { | ||
return static_cast<const char *>(VARDATA_ANY(value)); | ||
} | ||
|
||
bool DatumGetBool(Datum datum) { | ||
return ::DatumGetBool(datum); | ||
} | ||
|
||
char DatumGetChar(Datum datum) { | ||
return ::DatumGetChar(datum); | ||
} | ||
|
||
int16_t DatumGetInt16(Datum datum) { | ||
return ::DatumGetInt16(datum); | ||
} | ||
|
||
int32_t DatumGetInt32(Datum datum) { | ||
return ::DatumGetInt32(datum); | ||
} | ||
|
||
int64_t DatumGetInt64(Datum datum) { | ||
return ::DatumGetInt64(datum); | ||
} | ||
|
||
float DatumGetFloat4(Datum datum) { | ||
return ::DatumGetFloat4(datum); | ||
} | ||
|
||
double DatumGetFloat8(Datum datum) { | ||
return ::DatumGetFloat8(datum); | ||
} | ||
|
||
int32_t DatumGetDateADT(Datum datum) { | ||
return ::DatumGetDateADT(datum); | ||
} | ||
|
||
int64_t DatumGetTimestamp(Datum datum) { | ||
return ::DatumGetTimestamp(datum); | ||
} | ||
|
||
int64_t DatumGetTimestampTz(Datum datum) { | ||
return ::DatumGetTimestampTz(datum); | ||
} | ||
} // namespace pgduckdb::pg |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,21 @@ | ||
#include "pgduckdb/pgduckdb_filter.hpp" | ||
|
||
#include "duckdb.hpp" | ||
#include "duckdb/planner/filter/constant_filter.hpp" | ||
#include "pgduckdb/pgduckdb_types.hpp" | ||
#include "pgduckdb/pg/declarations.hpp" | ||
#include "pgduckdb/pg/datum.hpp" | ||
#include "pgduckdb/pgduckdb_detoast.hpp" | ||
|
||
|
||
extern "C" { | ||
#include "postgres.h" | ||
#include "catalog/pg_type.h" | ||
#include "utils/builtins.h" | ||
#include "utils/date.h" | ||
#include "utils/timestamp.h" | ||
#if PG_VERSION_NUM >= 160000 | ||
#include "varatt.h" | ||
#endif | ||
// Exceptional include of PG Header which doesn't contain any function declaration | ||
// only macro definitions and types. | ||
#include "catalog/pg_type_d.h" | ||
} | ||
|
||
#include "pgduckdb/pgduckdb_filter.hpp" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought there were filters we can't push down? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, but those we then also shouldn't receive from DuckDB. They should simply be executed by DuckDB. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yeah because everything will be emitted to DuckDB values after 477 lands. I think you're right. I'll keep this one on standby but will move to another file. |
||
#include "pgduckdb/pgduckdb_detoast.hpp" | ||
#include "pgduckdb/utility/cpp_only_file.hpp" // Must be last include. | ||
|
||
|
||
namespace pgduckdb { | ||
|
||
|
@@ -26,7 +27,7 @@ TemplatedFilterOperation(const T &value, const duckdb::Value &constant) { | |
|
||
template <class OP> | ||
bool | ||
StringFilterOperation(const Datum &value, const duckdb::Value &constant, bool is_bpchar) { | ||
StringFilterOperation(const Datum value, const duckdb::Value &constant, bool is_bpchar) { | ||
if (value == (Datum)0 || constant.IsNull()) { | ||
return false; // Comparison to NULL always returns false. | ||
} | ||
|
@@ -35,10 +36,10 @@ StringFilterOperation(const Datum &value, const duckdb::Value &constant, bool is | |
const auto detoasted_value = DetoastPostgresDatum(reinterpret_cast<varlena *>(value), &should_free); | ||
|
||
/* bpchar adds zero padding so we need to read true len of bpchar */ | ||
auto detoasted_val_len = is_bpchar ? bpchartruelen(VARDATA_ANY(detoasted_value), VARSIZE_ANY_EXHDR(detoasted_value)) | ||
: VARSIZE_ANY_EXHDR(detoasted_value); | ||
const auto detoasted_val_len = pg::GetDetoastedDatumLen(detoasted_value, is_bpchar); | ||
const auto detoasted_val = pg::GetDetoastedDatumVal(detoasted_value); | ||
|
||
const auto datum_sv = std::string_view((const char *)VARDATA_ANY(detoasted_value), detoasted_val_len); | ||
const auto datum_sv = std::string_view(detoasted_val, detoasted_val_len); | ||
const auto val = duckdb::StringValue::Get(constant); | ||
const auto val_sv = std::string_view(val); | ||
const bool res = OP::Operation(datum_sv, val_sv); | ||
|
@@ -54,29 +55,29 @@ static bool | |
FilterOperationSwitch(const Datum &value, const duckdb::Value &constant, Oid type_oid) { | ||
switch (type_oid) { | ||
case BOOLOID: | ||
return TemplatedFilterOperation<bool, OP>(DatumGetBool(value), constant); | ||
return TemplatedFilterOperation<bool, OP>(pg::DatumGetBool(value), constant); | ||
case CHAROID: | ||
return TemplatedFilterOperation<uint8_t, OP>(DatumGetChar(value), constant); | ||
return TemplatedFilterOperation<uint8_t, OP>(pg::DatumGetChar(value), constant); | ||
case INT2OID: | ||
return TemplatedFilterOperation<int16_t, OP>(DatumGetInt16(value), constant); | ||
return TemplatedFilterOperation<int16_t, OP>(pg::DatumGetInt16(value), constant); | ||
case INT4OID: | ||
return TemplatedFilterOperation<int32_t, OP>(DatumGetInt32(value), constant); | ||
return TemplatedFilterOperation<int32_t, OP>(pg::DatumGetInt32(value), constant); | ||
case INT8OID: | ||
return TemplatedFilterOperation<int64_t, OP>(DatumGetInt64(value), constant); | ||
return TemplatedFilterOperation<int64_t, OP>(pg::DatumGetInt64(value), constant); | ||
case FLOAT4OID: | ||
return TemplatedFilterOperation<float, OP>(DatumGetFloat4(value), constant); | ||
return TemplatedFilterOperation<float, OP>(pg::DatumGetFloat4(value), constant); | ||
case FLOAT8OID: | ||
return TemplatedFilterOperation<double, OP>(DatumGetFloat8(value), constant); | ||
return TemplatedFilterOperation<double, OP>(pg::DatumGetFloat8(value), constant); | ||
case DATEOID: { | ||
int32_t date = DatumGetDateADT(value) + pgduckdb::PGDUCKDB_DUCK_DATE_OFFSET; | ||
int32_t date = pg::DatumGetDateADT(value) + pgduckdb::PGDUCKDB_DUCK_DATE_OFFSET; | ||
return TemplatedFilterOperation<int32_t, OP>(date, constant); | ||
} | ||
case TIMESTAMPOID: { | ||
int64_t timestamp = DatumGetTimestamp(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET; | ||
int64_t timestamp = pg::DatumGetTimestamp(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET; | ||
return TemplatedFilterOperation<int64_t, OP>(timestamp, constant); | ||
} | ||
case TIMESTAMPTZOID: { | ||
int64_t timestamptz = DatumGetTimestampTz(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET; | ||
int64_t timestamptz = pg::DatumGetTimestampTz(value) + pgduckdb::PGDUCKDB_DUCK_TIMESTAMP_OFFSET; | ||
return TemplatedFilterOperation<int64_t, OP>(timestamptz, constant); | ||
} | ||
case BPCHAROID: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm kinda worried that for these tiny functions the call overhead of this could be significant. Postgres defines all these as
static inline
functions.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be clear, I'm thinking that maybe we should vendor in the required functions from Postgres in our own header instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I was thinking the same - since we decided we didn't really care in a previous PR I went that way, but I was about to ping you exactly for that :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, for some other places I wasn't too worried. But these seem like they're likely to be used in the hot-path. Still even the indirection might be fine there.