Skip to content

Commit

Permalink
Merge pull request #1543 from finos/weighted-mean-2
Browse files Browse the repository at this point in the history
Fix 'weighted mean' aggregate support in <perspective-viewer>
  • Loading branch information
texodus authored Sep 21, 2021
2 parents 13256dc + 84da7b6 commit 5a6d0cf
Show file tree
Hide file tree
Showing 221 changed files with 8,292 additions and 6,779 deletions.
1 change: 0 additions & 1 deletion cpp/perspective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,6 @@ if (PSP_WASM_BUILD)
-s MODULARIZE=1 \
-s EXPORT_NAME=\"load_perspective\" \
-s MAXIMUM_MEMORY=4gb \
-s WASM_BIGINT=1 \
-s USE_ES6_IMPORT_META=0 \
-s EXPORTED_FUNCTIONS=\"['_main']\" \
")
Expand Down
246 changes: 142 additions & 104 deletions cpp/perspective/src/cpp/aggregate.cpp

Large diffs are not rendered by default.

35 changes: 21 additions & 14 deletions cpp/perspective/src/cpp/aggspec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ t_col_name_type::t_col_name_type(const std::string& name, t_dtype type)

t_aggspec::t_aggspec() {}

t_aggspec::t_aggspec(
const std::string& name, t_aggtype agg, const std::vector<t_dep>& dependencies)
t_aggspec::t_aggspec(const std::string& name, t_aggtype agg,
const std::vector<t_dep>& dependencies)
: m_name(name)
, m_disp_name(name)
, m_agg(agg)
, m_dependencies(dependencies) {}

t_aggspec::t_aggspec(const std::string& aggname, t_aggtype agg, const std::string& dep)
t_aggspec::t_aggspec(
const std::string& aggname, t_aggtype agg, const std::string& dep)
: m_name(aggname)
, m_disp_name(aggname)
, m_agg(agg)
Expand All @@ -40,23 +41,24 @@ t_aggspec::t_aggspec(t_aggtype agg, const std::string& dep)
: m_agg(agg)
, m_dependencies(std::vector<t_dep>{t_dep(dep, DEPTYPE_COLUMN)}) {}

t_aggspec::t_aggspec(const std::string& name, const std::string& disp_name, t_aggtype agg,
const std::vector<t_dep>& dependencies)
t_aggspec::t_aggspec(const std::string& name, const std::string& disp_name,
t_aggtype agg, const std::vector<t_dep>& dependencies)
: m_name(name)
, m_disp_name(disp_name)
, m_agg(agg)
, m_dependencies(dependencies) {}

t_aggspec::t_aggspec(const std::string& name, const std::string& disp_name, t_aggtype agg,
const std::vector<t_dep>& dependencies, t_sorttype sort_type)
t_aggspec::t_aggspec(const std::string& name, const std::string& disp_name,
t_aggtype agg, const std::vector<t_dep>& dependencies, t_sorttype sort_type)
: m_name(name)
, m_disp_name(disp_name)
, m_agg(agg)
, m_dependencies(dependencies)
, m_sort_type(sort_type) {}

t_aggspec::t_aggspec(const std::string& aggname, const std::string& disp_aggname, t_aggtype agg,
t_uindex agg_one_idx, t_uindex agg_two_idx, double agg_one_weight, double agg_two_weight)
t_aggspec::t_aggspec(const std::string& aggname,
const std::string& disp_aggname, t_aggtype agg, t_uindex agg_one_idx,
t_uindex agg_two_idx, double agg_one_weight, double agg_two_weight)
: m_name(aggname)
, m_disp_name(disp_aggname)
, m_agg(agg)
Expand Down Expand Up @@ -232,7 +234,9 @@ get_simple_accumulator_type(t_dtype coltype) {
return DTYPE_FLOAT64;
}

default: { PSP_COMPLAIN_AND_ABORT("Unexpected coltype"); }
default: {
PSP_COMPLAIN_AND_ABORT("Unexpected coltype");
}
}
return DTYPE_NONE;
}
Expand Down Expand Up @@ -271,7 +275,7 @@ std::vector<std::string>
t_aggspec::get_input_depnames() const {
std::vector<std::string> rval;
rval.reserve(m_dependencies.size());
for (const auto & d : m_dependencies) {
for (const auto& d : m_dependencies) {
rval.push_back(d.name());
}
return rval;
Expand All @@ -281,7 +285,7 @@ std::vector<std::string>
t_aggspec::get_output_depnames() const {
std::vector<std::string> rval;
rval.reserve(m_dependencies.size());
for (const auto & d: m_dependencies) {
for (const auto& d : m_dependencies) {
rval.push_back(d.name());
}
return rval;
Expand All @@ -298,7 +302,8 @@ t_aggspec::get_output_specs(const t_schema& schema) const {
case AGGTYPE_MUL:
case AGGTYPE_SUM_NOT_NULL: {
t_dtype coltype = schema.get_dtype(m_dependencies[0].name());
return mk_col_name_type_vec(name(), get_simple_accumulator_type(coltype));
return mk_col_name_type_vec(
name(), get_simple_accumulator_type(coltype));
}
case AGGTYPE_ANY:
case AGGTYPE_UNIQUE:
Expand Down Expand Up @@ -352,7 +357,9 @@ t_aggspec::get_output_specs(const t_schema& schema) const {
case AGGTYPE_DISTINCT_COUNT: {
return mk_col_name_type_vec(name(), DTYPE_UINT32);
}
default: { PSP_COMPLAIN_AND_ABORT("Unknown agg type"); }
default: {
PSP_COMPLAIN_AND_ABORT("Unknown agg type");
}
}

return std::vector<t_col_name_type>();
Expand Down
4 changes: 2 additions & 2 deletions cpp/perspective/src/cpp/arg_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ t_argsort_comparator::operator()(t_index a, t_index b) const {
}

void
simple_argsort(
std::vector<t_tscalar>& v, std::vector<t_index>& output, const t_sorttype& sort_type) {
simple_argsort(std::vector<t_tscalar>& v, std::vector<t_index>& output,
const t_sorttype& sort_type) {
// Output should be the same size is v
for (t_index i = 0, loop_end = output.size(); i != loop_end; ++i)
output[i] = i;
Expand Down
52 changes: 32 additions & 20 deletions cpp/perspective/src/cpp/arrow_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
#include <arrow/io/memory.h>

#ifdef PSP_ENABLE_WASM
// This causes build warnings
// https://github.com/emscripten-core/emscripten/issues/8574
#include <perspective/vendor/arrow_single_threaded_reader.h>
// This causes build warnings
// https://github.com/emscripten-core/emscripten/issues/8574
#include <perspective/vendor/arrow_single_threaded_reader.h>
#else
#include <arrow/csv/reader.h>
#include <arrow/csv/reader.h>
#endif

namespace perspective {
Expand All @@ -30,7 +30,8 @@ namespace apachearrow {
int64_t* out) const override {
size_t endptr;
std::string val(s, s + length);
int64_t value = std::stoll(static_cast<std::string>(val), &endptr, 10);
int64_t value
= std::stoll(static_cast<std::string>(val), &endptr, 10);
if (endptr != length) {
return false;
} else {
Expand All @@ -51,7 +52,8 @@ namespace apachearrow {
if (ARROW_PREDICT_FALSE(s[0] != '.')) {
return false;
}
if (ARROW_PREDICT_FALSE(!arrow::internal::ParseUnsigned(s + 1, 3, &millis))) {
if (ARROW_PREDICT_FALSE(
!arrow::internal::ParseUnsigned(s + 1, 3, &millis))) {
return false;
}

Expand All @@ -66,10 +68,12 @@ namespace apachearrow {
ParseTZ(const char* s, std::chrono::hours* out) {
uint8_t hours = 0;

if (ARROW_PREDICT_FALSE(s[0] != '+') && ARROW_PREDICT_FALSE(s[0] != '-')) {
if (ARROW_PREDICT_FALSE(s[0] != '+')
&& ARROW_PREDICT_FALSE(s[0] != '-')) {
return false;
}
if (ARROW_PREDICT_FALSE(!arrow::internal::ParseUnsigned(s + 1, 2, &hours))) {
if (ARROW_PREDICT_FALSE(
!arrow::internal::ParseUnsigned(s + 1, 2, &hours))) {
return false;
}

Expand Down Expand Up @@ -97,12 +101,14 @@ namespace apachearrow {
// "YYYY-MM-DD[ T]hh:mm:ss.sss"
arrow_vendored::date::year_month_day ymd;
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseYYYY_MM_DD(s, &ymd))) {
!arrow::internal::detail::ParseYYYY_MM_DD(
s, &ymd))) {
return false;
}
std::chrono::seconds seconds;
if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH_MM_SS(
s + 11, &seconds))) {
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseHH_MM_SS(
s + 11, &seconds))) {
return false;
}
std::chrono::milliseconds millis;
Expand All @@ -111,18 +117,21 @@ namespace apachearrow {
}

*out = arrow::internal::detail::ConvertTimePoint(
arrow_vendored::date::sys_days(ymd) + seconds + millis, unit);
arrow_vendored::date::sys_days(ymd) + seconds + millis,
unit);
return true;
} else if (length == 25) {
// "2008-09-15[ T]15:53:00+05:00"
arrow_vendored::date::year_month_day ymd;
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseYYYY_MM_DD(s, &ymd))) {
!arrow::internal::detail::ParseYYYY_MM_DD(
s, &ymd))) {
return false;
}
std::chrono::seconds seconds;
if (ARROW_PREDICT_FALSE(!arrow::internal::detail::ParseHH_MM_SS(
s + 11, &seconds))) {
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseHH_MM_SS(
s + 11, &seconds))) {
return false;
}
std::chrono::hours tz;
Expand All @@ -131,7 +140,8 @@ namespace apachearrow {
}

*out = arrow::internal::detail::ConvertTimePoint(
arrow_vendored::date::sys_days(ymd) + tz + seconds, unit);
arrow_vendored::date::sys_days(ymd) + tz + seconds,
unit);
return true;
}
return false;
Expand All @@ -148,7 +158,8 @@ namespace apachearrow {
std::vector<std::shared_ptr<arrow::TimestampParser>> DATE_PARSERS{
std::make_shared<CustomISO8601Parser>(),
arrow::TimestampParser::MakeStrptime("%Y-%m-%d\\D%H:%M:%S.%f"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y, %I:%M:%S %p"), // US locale string
arrow::TimestampParser::MakeStrptime(
"%m/%d/%Y, %I:%M:%S %p"), // US locale string
arrow::TimestampParser::MakeStrptime("%m-%d-%Y"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y"),
arrow::TimestampParser::MakeStrptime("%d %m %Y"),
Expand All @@ -159,7 +170,8 @@ namespace apachearrow {
std::make_shared<UnixTimestampParser>(),
std::make_shared<CustomISO8601Parser>(),
arrow::TimestampParser::MakeStrptime("%Y-%m-%d\\D%H:%M:%S.%f"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y, %I:%M:%S %p"), // US locale string
arrow::TimestampParser::MakeStrptime(
"%m/%d/%Y, %I:%M:%S %p"), // US locale string
arrow::TimestampParser::MakeStrptime("%m-%d-%Y"),
arrow::TimestampParser::MakeStrptime("%m/%d/%Y"),
arrow::TimestampParser::MakeStrptime("%d %m %Y"),
Expand All @@ -169,8 +181,8 @@ namespace apachearrow {
parseAsArrowTimestamp(const std::string& input) {
for (auto candidate : DATE_PARSERS) {
int64_t datetime;
if (candidate->operator()(
input.c_str(), input.size(), arrow::TimeUnit::MILLI, &datetime)) {
if (candidate->operator()(input.c_str(), input.size(),
arrow::TimeUnit::MILLI, &datetime)) {
return datetime;
}
}
Expand Down
Loading

0 comments on commit 5a6d0cf

Please sign in to comment.