Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix column ordering in Python, null handling for computed columns #907

Merged
merged 4 commits into from
Feb 8, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
consistently handle nulls in computed columns
  • Loading branch information
sc1f committed Feb 5, 2020
commit f2a508036b08b4760103c8755e8419d33a3f8374
117 changes: 89 additions & 28 deletions cpp/perspective/src/cpp/computed_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ using float64 = double;
#define POW(T) \
t_tscalar pow_##T(t_tscalar x) { \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
rval.set(static_cast<float64>( \
pow(static_cast<float64>(x.get<T>()), 2) \
)); \
Expand All @@ -51,6 +52,7 @@ using float64 = double;
#define INVERT(T) \
t_tscalar invert_##T(t_tscalar x) { \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
float64 rhs = static_cast<float64>(x.get<T>()); \
if (rhs != 0) rval.set(static_cast<float64>(1 / rhs)); \
return rval; \
Expand All @@ -59,22 +61,25 @@ using float64 = double;
#define SQRT(T) \
t_tscalar sqrt_##T(t_tscalar x) { \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
float64 val = static_cast<float64>(x.get<T>()); \
rval.set(static_cast<float64>(sqrt(val))); \
return rval; \
}

#define ABS(T) \
t_tscalar abs_##T(t_tscalar x) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
rval.set(static_cast<float64>( \
std::abs(static_cast<float64>(x.get<T>())))); \
return rval; \
}

#define BUCKET_10(T) \
t_tscalar bucket_10_##T(t_tscalar x) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
T val = x.get<T>(); \
rval.set(static_cast<float64>( \
floor(static_cast<float64>(val) / 10)) * 10); \
Expand All @@ -83,7 +88,8 @@ using float64 = double;

#define BUCKET_100(T) \
t_tscalar bucket_100_##T(t_tscalar x) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
T val = x.get<T>(); \
rval.set(static_cast<float64>( \
floor(static_cast<float64>(val) / 100)) * 100); \
Expand All @@ -92,7 +98,8 @@ using float64 = double;

#define BUCKET_1000(T) \
t_tscalar bucket_1000_##T(t_tscalar x) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
T val = x.get<T>(); \
rval.set(static_cast<float64>( \
floor(static_cast<float64>(val) / 1000)) * 1000); \
Expand All @@ -101,7 +108,8 @@ using float64 = double;

#define BUCKET_0_1(T) \
t_tscalar bucket_0_1_##T(t_tscalar x) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
T val = x.get<T>(); \
rval.set(static_cast<float64>( \
floor(static_cast<float64>(val) / 0.1)) * 0.1); \
Expand All @@ -110,7 +118,8 @@ using float64 = double;

#define BUCKET_0_0_1(T) \
t_tscalar bucket_0_0_1_##T(t_tscalar x) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
T val = x.get<T>(); \
rval.set(static_cast<float64>( \
floor(static_cast<float64>(val) / 0.01)) * 0.01); \
Expand All @@ -119,7 +128,8 @@ using float64 = double;

#define BUCKET_0_0_0_1(T) \
t_tscalar bucket_0_0_0_1_##T(t_tscalar x) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if (x.is_none() || !x.is_valid()) return rval; \
T val = x.get<T>(); \
rval.set(static_cast<float64>( \
floor(static_cast<float64>(val) / 0.001)) * 0.001); \
Expand Down Expand Up @@ -244,28 +254,36 @@ NUMERIC_FUNCTION_1(BUCKET_0_0_0_1);

#define ADD(T1, T2) \
t_tscalar add_##T1##_##T2(t_tscalar x, t_tscalar y) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if ((x.is_none() || !x.is_valid()) \
|| (y.is_none() || !y.is_valid())) return rval; \
rval.set(static_cast<float64>(x.get<T1>() + y.get<T2>())); \
return rval; \
}

#define SUBTRACT(T1, T2) \
t_tscalar subtract_##T1##_##T2(t_tscalar x, t_tscalar y) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if ((x.is_none() || !x.is_valid()) \
|| (y.is_none() || !y.is_valid())) return rval; \
rval.set(static_cast<float64>(x.get<T1>() - y.get<T2>())); \
return rval; \
}

#define MULTIPLY(T1, T2) \
t_tscalar multiply_##T1##_##T2(t_tscalar x, t_tscalar y) { \
t_tscalar rval; \
t_tscalar rval = mknone(); \
if ((x.is_none() || !x.is_valid()) \
|| (y.is_none() || !y.is_valid())) return rval; \
rval.set(static_cast<float64>(x.get<T1>() * y.get<T2>())); \
return rval; \
}

#define DIVIDE(T1, T2) \
t_tscalar divide_##T1##_##T2(t_tscalar x, t_tscalar y) { \
t_tscalar rval = mknone(); \
if ((x.is_none() || !x.is_valid()) \
|| (y.is_none() || !y.is_valid())) return rval; \
float64 lhs = static_cast<float64>(x.get<T1>()); \
float64 rhs = static_cast<float64>(y.get<T2>()); \
if (rhs != 0) rval.set(static_cast<float64>(lhs / rhs)); \
Expand All @@ -275,6 +293,8 @@ NUMERIC_FUNCTION_1(BUCKET_0_0_0_1);
#define PERCENT_OF(T1, T2) \
t_tscalar percent_of_##T1##_##T2(t_tscalar x, t_tscalar y) { \
t_tscalar rval = mknone(); \
if ((x.is_none() || !x.is_valid()) \
|| (y.is_none() || !y.is_valid())) return rval; \
float64 lhs = static_cast<float64>(x.get<T1>()); \
float64 rhs = static_cast<float64>(y.get<T2>()); \
if (rhs != 0) rval.set(static_cast<float64>(lhs / rhs) * 100); \
Expand Down Expand Up @@ -336,7 +356,7 @@ NUMERIC_FUNCTION_2_DISPATCH_ALL_TYPES(percent_of);
t_tscalar length(t_tscalar x) {
t_tscalar rval = mknone();

if (x.get_dtype() != DTYPE_STR) {
if (x.is_none() || !x.is_valid() || x.get_dtype() != DTYPE_STR) {
return rval;
}

Expand All @@ -346,7 +366,7 @@ t_tscalar length(t_tscalar x) {
}

void uppercase(t_tscalar x, std::int32_t idx, std::shared_ptr<t_column> output_column) {
if (x.get_dtype() != DTYPE_STR) {
if (x.is_none() || !x.is_valid() || x.get_dtype() != DTYPE_STR) {
output_column->set_scalar(idx, mknone());
output_column->set_valid(idx, false);
return;
Expand All @@ -358,7 +378,7 @@ void uppercase(t_tscalar x, std::int32_t idx, std::shared_ptr<t_column> output_c
}

void lowercase(t_tscalar x, std::int32_t idx, std::shared_ptr<t_column> output_column) {
if (x.get_dtype() != DTYPE_STR) {
if (x.is_none() || !x.is_valid() || x.get_dtype() != DTYPE_STR) {
output_column->set_scalar(idx, mknone());
output_column->set_valid(idx, false);
return;
Expand All @@ -370,7 +390,8 @@ void lowercase(t_tscalar x, std::int32_t idx, std::shared_ptr<t_column> output_c
}

void concat_space(t_tscalar x, t_tscalar y, std::int32_t idx, std::shared_ptr<t_column> output_column) {
if (x.get_dtype() != DTYPE_STR || y.get_dtype() != DTYPE_STR) {
if ((x.is_none() || !x.is_valid() || x.get_dtype() != DTYPE_STR)
|| (y.is_none() || !y.is_valid() || y.get_dtype() != DTYPE_STR)) {
output_column->set_scalar(idx, mknone());
output_column->set_valid(idx, false);
return;
Expand All @@ -380,7 +401,8 @@ void concat_space(t_tscalar x, t_tscalar y, std::int32_t idx, std::shared_ptr<t_
}

void concat_comma(t_tscalar x, t_tscalar y, std::int32_t idx, std::shared_ptr<t_column> output_column) {
if (x.get_dtype() != DTYPE_STR || y.get_dtype() != DTYPE_STR) {
if ((x.is_none() || !x.is_valid() || x.get_dtype() != DTYPE_STR)
|| (y.is_none() || !y.is_valid() || y.get_dtype() != DTYPE_STR)) {
output_column->set_scalar(idx, mknone());
output_column->set_valid(idx, false);
return;
Expand All @@ -393,15 +415,17 @@ void concat_comma(t_tscalar x, t_tscalar y, std::int32_t idx, std::shared_ptr<t_
// Date/Datetime functions
template<>
t_tscalar hour_of_day<DTYPE_DATE>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;
// Hour of day for a date is always midnight, i.e. 0
rval.set(static_cast<std::int64_t>(0));
return rval;
}

template<>
t_tscalar hour_of_day<DTYPE_TIME>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;

// Convert the int64 to a milliseconds duration timestamp
std::chrono::milliseconds timestamp(x.to_int64());
Expand All @@ -422,13 +446,15 @@ t_tscalar hour_of_day<DTYPE_TIME>(t_tscalar x) {

template<>
t_tscalar second_bucket<DTYPE_DATE>(t_tscalar x) {
if (x.is_none() || !x.is_valid()) return mknone();
return x;
}

template<>
t_tscalar second_bucket<DTYPE_TIME>(t_tscalar x) {
// Retrieve the timestamp as an integer and bucket it
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;
auto int_ts = x.to_int64();
std::int64_t bucketed_ts = (static_cast<double>(int_ts) / 1000) * 1000;
rval.set(t_time(bucketed_ts));
Expand All @@ -437,12 +463,14 @@ t_tscalar second_bucket<DTYPE_TIME>(t_tscalar x) {

template<>
t_tscalar minute_bucket<DTYPE_DATE>(t_tscalar x) {
if (x.is_none() || !x.is_valid()) return mknone();
return x;
}

template<>
t_tscalar minute_bucket<DTYPE_TIME>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;

// Convert the int64 to a milliseconds duration timestamp
std::chrono::milliseconds ms_timestamp(x.to_int64());
Expand All @@ -458,12 +486,14 @@ t_tscalar minute_bucket<DTYPE_TIME>(t_tscalar x) {

template<>
t_tscalar hour_bucket<DTYPE_DATE>(t_tscalar x) {
if (x.is_none() || !x.is_valid()) return mknone();
return x;
}

template<>
t_tscalar hour_bucket<DTYPE_TIME>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;

// Convert the int64 to a millisecond duration timestamp
std::chrono::milliseconds ms_timestamp(x.to_int64());
Expand All @@ -479,12 +509,14 @@ t_tscalar hour_bucket<DTYPE_TIME>(t_tscalar x) {

template<>
t_tscalar day_bucket<DTYPE_DATE>(t_tscalar x) {
if (x.is_none() || !x.is_valid()) return mknone();
return x;
}

template<>
t_tscalar day_bucket<DTYPE_TIME>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;

// Convert the int64 to a milliseconds duration timestamp
std::chrono::milliseconds ms_timestamp(x.to_int64());
Expand All @@ -510,7 +542,8 @@ t_tscalar day_bucket<DTYPE_TIME>(t_tscalar x) {

template<>
t_tscalar week_bucket<DTYPE_DATE>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;

// Retrieve the `t_date` struct from the scalar
t_date val = x.get<t_date>();
Expand Down Expand Up @@ -544,7 +577,8 @@ t_tscalar week_bucket<DTYPE_DATE>(t_tscalar x) {

template<>
t_tscalar week_bucket<DTYPE_TIME>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;

// Convert the int64 to a milliseconds duration timestamp
std::chrono::milliseconds timestamp(x.to_int64());
Expand Down Expand Up @@ -573,15 +607,17 @@ t_tscalar week_bucket<DTYPE_TIME>(t_tscalar x) {

template<>
t_tscalar month_bucket<DTYPE_DATE>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;
t_date val = x.get<t_date>();
rval.set(t_date(val.year(), val.month(), 1));
return rval;
}

template<>
t_tscalar month_bucket<DTYPE_TIME>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;

// Convert the int64 to a milliseconds duration timestamp
std::chrono::milliseconds timestamp(x.to_int64());
Expand All @@ -608,16 +644,17 @@ t_tscalar month_bucket<DTYPE_TIME>(t_tscalar x) {

template<>
t_tscalar year_bucket<DTYPE_DATE>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;
t_date val = x.get<t_date>();
rval.set(t_date(val.year(), 0, 1));
std::cout << rval << std::endl;
return rval;
}

template<>
t_tscalar year_bucket<DTYPE_TIME>(t_tscalar x) {
t_tscalar rval;
t_tscalar rval = mknone();
if (x.is_none() || !x.is_valid()) return rval;

// Convert the int64 to a milliseconds duration timestamp
std::chrono::milliseconds timestamp(x.to_int64());
Expand Down Expand Up @@ -666,6 +703,12 @@ const std::string months_of_year[12] = {
template <>
void day_of_week<DTYPE_DATE>(
t_tscalar x, std::int32_t idx, std::shared_ptr<t_column> output_column) {
if (x.is_none() || !x.is_valid()) {
output_column->set_scalar(idx, mknone());
output_column->set_valid(idx, false);
return;
}

// Retrieve the `t_date` struct from the scalar
t_date val = x.get<t_date>();

Expand All @@ -691,6 +734,12 @@ void day_of_week<DTYPE_DATE>(
template <>
void day_of_week<DTYPE_TIME>(
t_tscalar x, std::int32_t idx, std::shared_ptr<t_column> output_column) {
if (x.is_none() || !x.is_valid()) {
output_column->set_scalar(idx, mknone());
output_column->set_valid(idx, false);
return;
}

// Convert the int64 to a milliseconds duration timestamp
std::chrono::milliseconds timestamp(x.to_int64());

Expand All @@ -710,6 +759,12 @@ void day_of_week<DTYPE_TIME>(
template <>
void month_of_year<DTYPE_DATE>(
t_tscalar x, std::int32_t idx, std::shared_ptr<t_column> output_column) {
if (x.is_none() || !x.is_valid()) {
output_column->set_scalar(idx, mknone());
output_column->set_valid(idx, false);
return;
}

t_date val = x.get<t_date>();

// `t_date.month()` is [0-11]
Expand All @@ -721,6 +776,12 @@ void month_of_year<DTYPE_DATE>(
template <>
void month_of_year<DTYPE_TIME>(
t_tscalar x, std::int32_t idx, std::shared_ptr<t_column> output_column) {
if (x.is_none() || !x.is_valid()) {
output_column->set_scalar(idx, mknone());
output_column->set_valid(idx, false);
return;
}

// Convert the int64 to a milliseconds duration timestamp
std::chrono::milliseconds timestamp(x.to_int64());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ var Borders = cellRenderersRegistry.BaseClass.extend("Borders", {
var color;

gc.save();
gc.translate(-0.5, 0.5); // paint "sharp" lines on pixels instead of "blury" lines between pixels
gc.translate(-0.5, 0.5); // paint "sharp" lines on pixels instead of "blurry" lines between pixels
gc.cache.lineWidth = 1;

color = config.borderTop;
Expand Down
Loading