From 5becd6245fd67b9fc8035a1a14b60482c88dfa9f Mon Sep 17 00:00:00 2001 From: Davis Silverman Date: Tue, 25 Apr 2023 18:42:22 -0400 Subject: [PATCH] ExprTK bucket() dates with a multiplicity. --- cpp/perspective/src/cpp/computed_function.cpp | 191 +++++++++++++----- .../include/perspective/computed_function.h | 12 +- .../perspective/test/js/expressions/common.js | 36 +++- .../test/js/expressions/datetime.spec.js | 164 +++++++++++++++ 4 files changed, 337 insertions(+), 66 deletions(-) diff --git a/cpp/perspective/src/cpp/computed_function.cpp b/cpp/perspective/src/cpp/computed_function.cpp index 4b7db58b98..fa0a69ef79 100644 --- a/cpp/perspective/src/cpp/computed_function.cpp +++ b/cpp/perspective/src/cpp/computed_function.cpp @@ -1121,11 +1121,11 @@ namespace computed_function { return rval; } - tsl::hopscotch_map bucket::UNIT_MAP = { - {"s", t_date_bucket_unit::SECONDS}, {"m", t_date_bucket_unit::MINUTES}, - {"h", t_date_bucket_unit::HOURS}, {"D", t_date_bucket_unit::DAYS}, - {"W", t_date_bucket_unit::WEEKS}, {"M", t_date_bucket_unit::MONTHS}, - {"Y", t_date_bucket_unit::YEARS}}; + tsl::hopscotch_map bucket::UNIT_MAP = { + {'s', t_date_bucket_unit::SECONDS}, {'m', t_date_bucket_unit::MINUTES}, + {'h', t_date_bucket_unit::HOURS}, {'D', t_date_bucket_unit::DAYS}, + {'W', t_date_bucket_unit::WEEKS}, {'M', t_date_bucket_unit::MONTHS}, + {'Y', t_date_bucket_unit::YEARS}}; bucket::bucket() : exprtk::igeneric_function("T?") {} @@ -1175,8 +1175,30 @@ namespace computed_function { t_string_view temp_string(gt_unit); std::string unit_str = std::string(temp_string.begin(), temp_string.end()); - - if (bucket::UNIT_MAP.count(unit_str) == 0) { + char temp_unit = 0; + auto len = unit_str.size(); + unsigned long multiplicity; + t_date_bucket_unit date_unit; + if (len == 0) { + // Does not type-check! + rval.m_status = STATUS_CLEAR; + return rval; + } else if (len == 1) { + // No multiplicity explicity given, defaults to 1. + multiplicity = 1; + temp_unit = unit_str.at(0); + } else { + temp_unit = unit_str.at(len - 1); + std::string mult = unit_str.substr(0, len - 1); + if (!std::all_of(mult.begin(), mult.end(), ::isdigit)) { + // multiplicity is not a non-negative integer + rval.m_status = STATUS_CLEAR; + return rval; + } + multiplicity = std::stoul(mult); + } + std::string allowed_units = "smhDWMY"; + if (allowed_units.find(temp_unit) == std::string::npos) { std::cerr << "[bucket] unknown unit in bucket - the valid units " "are 's', 'm', 'h', 'D', 'W', 'M', and 'Y'." << std::endl; @@ -1184,10 +1206,63 @@ namespace computed_function { rval.m_status = STATUS_CLEAR; return rval; } + date_unit = bucket::UNIT_MAP[temp_unit]; - t_date_bucket_unit date_bucket_unit = bucket::UNIT_MAP[unit_str]; + // type-check multiplicity + switch (date_unit) { + case t_date_bucket_unit::SECONDS: + if (!(multiplicity == 1 || multiplicity == 5 + || multiplicity == 10 || multiplicity == 15 + || multiplicity == 20 || multiplicity == 30)) { + rval.m_status = STATUS_CLEAR; + return rval; + } + break; + case t_date_bucket_unit::MINUTES: + if (!(multiplicity == 1 || multiplicity == 5 + || multiplicity == 10 || multiplicity == 15 + || multiplicity == 20 || multiplicity == 30)) { + rval.m_status = STATUS_CLEAR; + return rval; + } + break; + case t_date_bucket_unit::HOURS: + if (!(multiplicity == 1 || multiplicity == 2 + || multiplicity == 3 || multiplicity == 4 + || multiplicity == 6 || multiplicity == 12)) { + rval.m_status = STATUS_CLEAR; + return rval; + } + break; + case t_date_bucket_unit::DAYS: + // TODO: day multiplicity. + if (multiplicity > 31) { + rval.m_status = STATUS_CLEAR; + return rval; + } + break; + case t_date_bucket_unit::WEEKS: + // TODO: week multiplicity + if (multiplicity > 4) { + rval.m_status = STATUS_CLEAR; + return rval; + } + break; + case t_date_bucket_unit::MONTHS: + if (!(multiplicity == 1 || multiplicity == 2 + || multiplicity == 3 || multiplicity == 4 + || multiplicity == 6)) { + rval.m_status = STATUS_CLEAR; + return rval; + } + break; + case t_date_bucket_unit::YEARS: + break; + default: + PSP_COMPLAIN_AND_ABORT("[bucket] invalid date bucket unit!"); + break; + } t_dtype val_dtype = val.get_dtype(); - // type-check if (!(val_dtype == DTYPE_DATE || val_dtype == DTYPE_TIME)) { rval.m_status = STATUS_CLEAR; @@ -1196,7 +1271,7 @@ namespace computed_function { // Depending on unit, datetime columns can result in a date column or a // datetime column. if (val_dtype == DTYPE_TIME) { - switch (date_bucket_unit) { + switch (date_unit) { case t_date_bucket_unit::SECONDS: case t_date_bucket_unit::MINUTES: case t_date_bucket_unit::HOURS: { @@ -1222,15 +1297,15 @@ namespace computed_function { return rval; } - switch (date_bucket_unit) { + switch (date_unit) { case t_date_bucket_unit::SECONDS: { - _second_bucket(val, rval); + _second_bucket(val, rval, multiplicity); } break; case t_date_bucket_unit::MINUTES: { - _minute_bucket(val, rval); + _minute_bucket(val, rval, multiplicity); } break; case t_date_bucket_unit::HOURS: { - _hour_bucket(val, rval); + _hour_bucket(val, rval, multiplicity); } break; case t_date_bucket_unit::DAYS: { _day_bucket(val, rval); @@ -1239,10 +1314,10 @@ namespace computed_function { _week_bucket(val, rval); } break; case t_date_bucket_unit::MONTHS: { - _month_bucket(val, rval); + _month_bucket(val, rval, multiplicity); } break; case t_date_bucket_unit::YEARS: { - _year_bucket(val, rval); + _year_bucket(val, rval, multiplicity); } break; default: { PSP_COMPLAIN_AND_ABORT("[bucket] invalid date bucket unit!"); @@ -1252,14 +1327,29 @@ namespace computed_function { return rval; } + /// @brief Buckets a given time into a date at multiplicity*T resolution. + /// @tparam T The std::chrono::duration to bucket by. + /// @param val The input date. + /// @param multiplicity How many Ts to put in a bucket. + /// @return The bucketed time. + template + t_time + bucket_time(t_tscalar& val, t_uindex multiplicity) { + std::chrono::milliseconds millis(val.to_int64()); + auto raw = std::chrono::duration_cast(millis).count(); + int64_t bucket + = floor(static_cast(raw) / multiplicity) * multiplicity; + T refined(bucket); + return t_time( + std::chrono::duration_cast(refined) + .count()); + } + void - _second_bucket(t_tscalar& val, t_tscalar& rval) { + _second_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity) { switch (val.get_dtype()) { case DTYPE_TIME: { - auto int_ts = val.to_int64(); - std::int64_t bucketed_ts - = floor(static_cast(int_ts) / 1000) * 1000; - rval.set(t_time(bucketed_ts)); + rval.set(bucket_time(val, multiplicity)); } break; default: { // echo the original value back into the column. @@ -1269,22 +1359,10 @@ namespace computed_function { } void - _minute_bucket(t_tscalar& val, t_tscalar& rval) { + _minute_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity) { switch (val.get_dtype()) { case DTYPE_TIME: { - // Convert the int64 to a milliseconds duration timestamp - std::chrono::milliseconds ms_timestamp(val.to_int64()); - - // Convert milliseconds to minutes - std::chrono::minutes m_timestamp - = std::chrono::duration_cast( - ms_timestamp); - - // Set a new `t_time` and return it. - rval.set(t_time( - std::chrono::duration_cast( - m_timestamp) - .count())); + rval.set(bucket_time(val, multiplicity)); } break; default: { rval.set(val); @@ -1293,22 +1371,10 @@ namespace computed_function { } void - _hour_bucket(t_tscalar& val, t_tscalar& rval) { + _hour_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity) { switch (val.get_dtype()) { case DTYPE_TIME: { - // Convert the int64 to a millisecond duration timestamp - std::chrono::milliseconds ms_timestamp(val.to_int64()); - - // Convert the milliseconds to hours - std::chrono::hours hr_timestamp - = std::chrono::duration_cast( - ms_timestamp); - - // Set a new `t_time` and return it. - rval.set(t_time( - std::chrono::duration_cast( - hr_timestamp) - .count())); + rval.set(bucket_time(val, multiplicity)); } break; default: { rval.set(val); @@ -1344,6 +1410,7 @@ namespace computed_function { rval.set(t_date(year, month, day)); } break; + case DTYPE_DATE: default: { // echo the original value back into the column. rval.set(val); @@ -1434,14 +1501,19 @@ namespace computed_function { } void - _month_bucket(t_tscalar& val, t_tscalar& rval) { + _month_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity) { switch (val.get_dtype()) { case DTYPE_DATE: { t_date date_val = val.get(); - rval.set(t_date(date_val.year(), date_val.month(), 1)); + auto in_month = date_val.month(); + int8_t out_month + = floor(static_cast(in_month) / multiplicity) + * multiplicity; + rval.set(t_date(date_val.year(), out_month, 1)); } break; case DTYPE_TIME: { - // Convert the int64 to a milliseconds duration timestamp + // Convert the int64 to a milliseconds duration + // timestamp std::chrono::milliseconds ms_timestamp(val.to_int64()); // Convert the timestamp to a `sys_time` (alias for @@ -1456,6 +1528,10 @@ namespace computed_function { std::int32_t year = static_cast(t->tm_year + 1900); std::int32_t month = static_cast(t->tm_mon); + if (multiplicity != 1) { + month = floor(static_cast(month) / multiplicity) + * multiplicity; + } rval.set(t_date(year, month, 1)); } break; default: @@ -1464,11 +1540,14 @@ namespace computed_function { } void - _year_bucket(t_tscalar& val, t_tscalar& rval) { + _year_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity) { switch (val.get_dtype()) { case DTYPE_DATE: { t_date date_val = val.get(); - rval.set(t_date(date_val.year(), 0, 1)); + rval.set(t_date( + floor(static_cast(date_val.year()) / multiplicity) + * multiplicity, + 0, 1)); } break; case DTYPE_TIME: { // Convert the int64 to a milliseconds duration timestamp @@ -1485,6 +1564,10 @@ namespace computed_function { // Use the `tm` to create the `t_date` std::int32_t year = static_cast(t->tm_year + 1900); + if (multiplicity != 1) { + year = floor(static_cast(year) / multiplicity) + * multiplicity; + } rval.set(t_date(year, 0, 1)); } break; default: diff --git a/cpp/perspective/src/include/perspective/computed_function.h b/cpp/perspective/src/include/perspective/computed_function.h index 9ec51a6021..3b7d9c505b 100644 --- a/cpp/perspective/src/include/perspective/computed_function.h +++ b/cpp/perspective/src/include/perspective/computed_function.h @@ -227,16 +227,16 @@ namespace computed_function { // faster unit lookups, since we are calling this lookup in a tight // loop. - static tsl::hopscotch_map UNIT_MAP; + static tsl::hopscotch_map UNIT_MAP; }; - void _second_bucket(t_tscalar& val, t_tscalar& rval); - void _minute_bucket(t_tscalar& val, t_tscalar& rval); - void _hour_bucket(t_tscalar& val, t_tscalar& rval); + void _second_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity); + void _minute_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity); + void _hour_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity); void _day_bucket(t_tscalar& val, t_tscalar& rval); void _week_bucket(t_tscalar& val, t_tscalar& rval); - void _month_bucket(t_tscalar& val, t_tscalar& rval); - void _year_bucket(t_tscalar& val, t_tscalar& rval); + void _month_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity); + void _year_bucket(t_tscalar& val, t_tscalar& rval, t_uindex multiplicity); /** * @brief Returns the current datetime. Will be recalculated on view diff --git a/packages/perspective/test/js/expressions/common.js b/packages/perspective/test/js/expressions/common.js index db3cec1921..8047ed9453 100644 --- a/packages/perspective/test/js/expressions/common.js +++ b/packages/perspective/test/js/expressions/common.js @@ -76,22 +76,36 @@ exports.months_of_year = [ "12 December", ]; -exports.second_bucket = function (val) { - return new Date(Math.floor(new Date(val).getTime() / 1000) * 1000); +exports.second_bucket = function (val, multiplicity) { + if (multiplicity === undefined) { + multiplicity = 1; + } + const mult = 1000 * multiplicity; + return new Date(Math.floor(new Date(val).getTime() / mult) * mult); }; -exports.minute_bucket = function (val) { +exports.minute_bucket = function (val, multiplicity) { + if (multiplicity === undefined) { + multiplicity = 1; + } let date = new Date(val); date.setSeconds(0); date.setMilliseconds(0); + date.setMinutes( + Math.floor(date.getMinutes() / multiplicity) * multiplicity + ); return date; }; -exports.hour_bucket = function (val) { +exports.hour_bucket = function (val, multiplicity) { + if (multiplicity === undefined) { + multiplicity = 1; + } let date = new Date(val); date.setMinutes(0); date.setSeconds(0); date.setMilliseconds(0); + date.setHours(Math.floor(date.getHours() / multiplicity) * multiplicity); return date; }; @@ -115,21 +129,31 @@ exports.week_bucket = function (val) { return date; }; -exports.month_bucket = function (val) { +exports.month_bucket = function (val, multiplicity) { + if (multiplicity === undefined) { + multiplicity = 1; + } let date = new Date(val); date.setHours(0); date.setMinutes(0); date.setSeconds(0); date.setDate(1); + date.setMonth(Math.floor(date.getMonth() / multiplicity) * multiplicity); return date; }; -exports.year_bucket = function (val) { +exports.year_bucket = function (val, multiplicity) { + if (multiplicity === undefined) { + multiplicity = 1; + } let date = new Date(val); date.setHours(0); date.setMinutes(0); date.setSeconds(0); date.setDate(1); date.setMonth(0); + date.setFullYear( + Math.floor(date.getFullYear() / multiplicity) * multiplicity + ); return date; }; diff --git a/packages/perspective/test/js/expressions/datetime.spec.js b/packages/perspective/test/js/expressions/datetime.spec.js index 893b2c23d9..72163a4dca 100644 --- a/packages/perspective/test/js/expressions/datetime.spec.js +++ b/packages/perspective/test/js/expressions/datetime.spec.js @@ -719,6 +719,42 @@ const perspective = require("@finos/perspective"); table.delete(); }); + test("Bucket (M), date with multiplicity", async () => { + const table = await perspective.table({ + a: "date", + }); + + const col_name = "bucket(\"a\", '3M')"; + + const view = await table.view({ + expressions: [col_name], + }); + + table.update({ + a: [ + new Date(2020, 0, 12), + new Date(2020, 1, 15), + new Date(2020, 2, 17), + new Date(2020, 3, 18), + new Date(2020, 4, 29), + new Date(2020, 5, 6), + new Date(2020, 6, 10), + new Date(2020, 7, 30), + new Date(2020, 8, 22), + new Date(2020, 9, 7), + new Date(2020, 10, 1), + ], + }); + + let result = await view.to_columns(); + + expect( + result[col_name].map((x) => (x ? new Date(x) : null)) + ).toEqual(result.a.map((x) => common.month_bucket(x, 3))); + view.delete(); + table.delete(); + }); + test("Bucket (Y), date", async function () { const table = await perspective.table({ a: "date", @@ -778,6 +814,42 @@ const perspective = require("@finos/perspective"); view.delete(); table.delete(); }); + + test("Bucket (Y), date with multiplicity", async () => { + const table = await perspective.table({ + a: "date", + }); + + const col_name = "bucket(\"a\", '7Y')"; + + const view = await table.view({ + expressions: [col_name], + }); + + table.update({ + a: [ + new Date(2010, 0, 12), + new Date(2011, 1, 15), + new Date(2012, 2, 17), + new Date(2013, 3, 18), + new Date(2014, 4, 29), + new Date(2015, 5, 6), + new Date(2016, 6, 10), + new Date(2017, 7, 30), + new Date(2018, 8, 22), + new Date(2019, 9, 7), + new Date(2020, 10, 1), + ], + }); + + let result = await view.to_columns(); + + expect( + result[col_name].map((x) => (x ? new Date(x) : null)) + ).toEqual(result.a.map((x) => common.year_bucket(x, 7))); + view.delete(); + table.delete(); + }); }); test.describe("Datetime, Arity 1 computed", function () { @@ -999,6 +1071,37 @@ const perspective = require("@finos/perspective"); table.delete(); }); + test("Bucket (s), datetime with multiplicity", async function () { + const table = await perspective.table({ + a: "datetime", + }); + + const col_name = `bucket("a", '20s')`; + + const view = await table.view({ + expressions: [col_name], + }); + + table.update({ + a: [ + new Date(2020, 0, 15, 1, 30, 5), + new Date(2020, 3, 29, 1, 30, 10), + new Date(2020, 4, 30, 1, 30, 19), + new Date(2020, 4, 30, 1, 30, 30), + new Date(2020, 4, 30, 1, 30, 50), + ], + }); + + let result = await view.to_columns(); + expect( + result[col_name].map((x) => (x ? new Date(x) : null)) + ).toEqual( + result.a.map((x) => (x ? common.second_bucket(x, 20) : null)) + ); + view.delete(); + table.delete(); + }); + test("Bucket (m), datetime", async function () { const table = await perspective.table({ a: "datetime", @@ -1055,6 +1158,36 @@ const perspective = require("@finos/perspective"); table.delete(); }); + test("Bucket (m), datetime with multiplicity", async function () { + const table = await perspective.table({ + a: "datetime", + }); + + const col_name = `bucket("a", '15m')`; + + const view = await table.view({ + expressions: [col_name], + }); + + table.update({ + a: [ + new Date(2020, 0, 15, 1, 0, 0), + new Date(2020, 0, 15, 1, 6, 0), + new Date(2020, 0, 15, 1, 15, 0), + new Date(2020, 0, 15, 1, 29, 0), + new Date(2020, 0, 15, 1, 30, 0), + new Date(2020, 0, 15, 1, 59, 0), + ], + }); + + let result = await view.to_columns(); + expect( + result[col_name].map((x) => (x ? new Date(x) : null)) + ).toEqual(result.a.map((x) => common.minute_bucket(x, 15))); + view.delete(); + table.delete(); + }); + test("Bucket (h), datetime", async function () { const table = await perspective.table({ a: "datetime", @@ -1109,6 +1242,37 @@ const perspective = require("@finos/perspective"); table.delete(); }); + test("Bucket (h), datetime with multiplicity", async function () { + const table = await perspective.table({ + a: "datetime", + }); + + const col_name = `bucket("a", '6h')`; + + const view = await table.view({ + expressions: [col_name], + }); + + table.update({ + a: [ + new Date(2020, 0, 15, 0, 30, 15), + new Date(2020, 0, 15, 5, 30, 15), + new Date(2020, 0, 15, 6, 30, 15), + new Date(2020, 0, 15, 9, 30, 15), + new Date(2020, 0, 15, 15, 30, 15), + new Date(2020, 0, 15, 20, 30, 15), + new Date(2020, 0, 15, 23, 30, 15), + ], + }); + + let result = await view.to_columns(); + expect( + result[col_name].map((x) => (x ? new Date(x) : null)) + ).toEqual(result.a.map((x) => common.hour_bucket(x, 6))); + view.delete(); + table.delete(); + }); + test("Bucket (D), datetime", async function () { const table = await perspective.table({ a: "datetime",