Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse aggregates in column order #1432

Merged
merged 3 commits into from
May 29, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
iterate through aggregates using column order
  • Loading branch information
sc1f authored and texodus committed May 29, 2021
commit 9db07a1bb7897aa73e78f841192f595bec8728bc
106 changes: 54 additions & 52 deletions cpp/perspective/src/cpp/view_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,63 +200,32 @@ t_view_config::get_column_pivot_depth() const {
// PRIVATE
void
t_view_config::fill_aggspecs(std::shared_ptr<t_schema> schema) {
/*
* Provide aggregates for columns that are shown but NOT specified in
* `m_aggregates`, including expressions that are in the `columns`
* array but not the `aggregates` map.
*/
auto max_agg_count = m_columns.size() + m_sort.size();
m_aggspecs.reserve(max_agg_count);
m_aggregate_names.reserve(max_agg_count);

// Iterate through the columns array - if the column is in the
// aggregates map, use the specified aggregate or generate a default. If
// an aggregate is in the aggregate map but NOT in the columns list,
// it is ignored and NOT applied.
for (const std::string& column : m_columns) {
if (m_aggregates.count(column) != 0) {
continue;
}

t_dtype dtype = schema->get_dtype(column);
std::vector<t_dep> dependencies{t_dep(column, DEPTYPE_COLUMN)};
t_aggtype agg_type
= t_aggtype::AGGTYPE_ANY; // use aggtype here since we are not parsing aggs

if (!m_column_only) {
agg_type = _get_default_aggregate(dtype);
}

// create aggregate specification, and memoize the column name
m_aggspecs.push_back(t_aggspec(column, agg_type, dependencies));
m_aggregate_names.push_back(column);
}

/**
* Construct aggspecs for aggregates explicitly specified in `m_aggregates`.
*/
for (auto const& iter : m_aggregates) {
auto column = iter.first;
auto aggregate = iter.second;
if (std::find(m_columns.begin(), m_columns.end(), column) == m_columns.end()) {
continue;
}

std::vector<t_dep> dependencies{t_dep(column, DEPTYPE_COLUMN)};
t_aggtype agg_type;

if (m_column_only) {
agg_type = t_aggtype::AGGTYPE_ANY;
} else {
if (aggregate.at(0) == "weighted mean") {
dependencies.push_back(t_dep(aggregate.at(1), DEPTYPE_COLUMN));
agg_type = AGGTYPE_WEIGHTED_MEAN;
} else {
agg_type = str_to_aggtype(aggregate.at(0));
}
}

if (agg_type == AGGTYPE_FIRST || agg_type == AGGTYPE_LAST_BY_INDEX) {
dependencies.push_back(t_dep("psp_okey", DEPTYPE_COLUMN));
m_aggspecs.push_back(
t_aggspec(column, column, agg_type, dependencies, SORTTYPE_ASCENDING));
const auto& agg_iter = m_aggregates.find(column);

if (agg_iter != m_aggregates.end()) {
// If the column name is in the aggregate map, use the custom
// aggregate as defined by the map and add it to m_aggspecs
// and m_aggregate_names.
const std::vector<std::string>& aggregate = agg_iter->second;
make_aggspec(column, aggregate, dtype);
} else {
// Generate a default aggregate based on the column type.
std::vector<t_dep> dependencies{t_dep(column, DEPTYPE_COLUMN)};
t_aggtype agg_type;
m_column_only ? agg_type = AGGTYPE_ANY : agg_type = _get_default_aggregate(dtype);
m_aggspecs.push_back(t_aggspec(column, agg_type, dependencies));
m_aggregate_names.push_back(column);
}

m_aggregate_names.push_back(column);
}

// construct aggspecs for hidden sorts
Expand Down Expand Up @@ -344,4 +313,37 @@ t_view_config::get_aggregate_index(const std::string& column) const {
return t_index();
}

void
t_view_config::make_aggspec(
const std::string& column, const std::vector<std::string>& aggregate, t_dtype dtype) {
t_aggtype agg_type;
t_aggspec aggspec;

// Maximum of 2 dependencies, based on the aggregate type
std::vector<t_dep> dependencies{t_dep(column, DEPTYPE_COLUMN)};
dependencies.reserve(2);

if (m_column_only) {
agg_type = t_aggtype::AGGTYPE_ANY;
} else {
if (aggregate.at(0) == "weighted mean") {
dependencies.push_back(t_dep(aggregate.at(1), DEPTYPE_COLUMN));
agg_type = AGGTYPE_WEIGHTED_MEAN;
} else {
agg_type = str_to_aggtype(aggregate.at(0));
}
}

if (agg_type == AGGTYPE_FIRST || agg_type == AGGTYPE_LAST_BY_INDEX) {
dependencies.push_back(t_dep("psp_okey", DEPTYPE_COLUMN));
aggspec = t_aggspec(
column, column, agg_type, dependencies, SORTTYPE_ASCENDING);
} else {
aggspec = t_aggspec(column, agg_type, dependencies);
}

m_aggspecs.push_back(aggspec);
m_aggregate_names.push_back(column);
}

} // end namespace perspective
3 changes: 3 additions & 0 deletions cpp/perspective/src/include/perspective/view_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <perspective/scalar.h>
#include <perspective/computed_expression.h>
#include <tsl/ordered_map.h>
#include <tsl/hopscotch_set.h>
#include <unordered_set>
#include <tuple>

Expand Down Expand Up @@ -154,6 +155,8 @@ class PERSPECTIVE_EXPORT t_view_config {
*/
t_index get_aggregate_index(const std::string& column) const;

void make_aggspec(const std::string& column, const std::vector<std::string>& aggregate, t_dtype dtype);

// containers for primitive data that does not need transformation into abstractions
std::vector<std::string> m_row_pivots;
std::vector<std::string> m_column_pivots;
Expand Down
93 changes: 93 additions & 0 deletions packages/perspective/test/js/expressions/functionality.js
Original file line number Diff line number Diff line change
Expand Up @@ -1480,6 +1480,99 @@ module.exports = perspective => {
table.delete();
});

it("Row-pivoted expression columns return correct column_paths()", async function() {
const table = await perspective.table(expressions_common.int_float_data);

// default order
let view = await table.view({
row_pivots: ["y"],
expressions: ['// column\n"w" + "x"']
});

let paths = await view.column_paths();
expect(paths).toEqual(["__ROW_PATH__", "w", "x", "y", "z", "column"]);

await view.delete();

const expected_paths = [["x", "column"], ["column"], ["x", "column", "y"]];

for (const expected of expected_paths) {
const output = expected.slice();
output.unshift("__ROW_PATH__");
view = await table.view({
row_pivots: ["y"],
expressions: ['// column\n"w" + "x"'],
columns: expected
});
paths = await view.column_paths();
expect(paths).toEqual(output);
view.delete();
}

for (const expected of expected_paths) {
const output = expected.slice();
output.unshift("__ROW_PATH__");
view = await table.view({
row_pivots: ["column"],
expressions: ['// column\n"w" + "x"'],
columns: expected
});
paths = await view.column_paths();
expect(paths).toEqual(output);
view.delete();
}

table.delete();
});

it("Row-pivoted numeric expression columns return correct column_paths()", async function() {
const table = await perspective.table(expressions_common.int_float_data);
const config = {
row_pivots: ["y"],
expressions: ["1234"],
aggregates: {
x: "sum",
y: "count",
"1234": "sum"
}
};
// default order
let view = await table.view(config);

let paths = await view.column_paths();
expect(paths).toEqual(["__ROW_PATH__", "w", "x", "y", "z", "1234"]);

await view.delete();

const expected_paths = [["x", "1234"], ["1234"], ["x", "1234", "y"]];

for (const expected of expected_paths) {
const output = expected.slice();
output.unshift("__ROW_PATH__");
view = await table.view({
...config,
columns: expected
});
paths = await view.column_paths();
expect(paths).toEqual(output);
view.delete();
}

for (const expected of expected_paths) {
const output = expected.slice();
output.unshift("__ROW_PATH__");
view = await table.view({
...config,
columns: expected
});
paths = await view.column_paths();
expect(paths).toEqual(output);
view.delete();
}

table.delete();
});

it("Should be able to column pivot on an expression column.", async function() {
const table = await perspective.table(expressions_common.int_float_data);
const view = await table.view({
Expand Down
72 changes: 72 additions & 0 deletions packages/perspective/test/js/pivots.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,51 @@ module.exports = perspective => {
table.delete();
});

it("Aggregates are processed in the order of the columns array", async function() {
const table = await perspective.table(data);
const view = await table.view({
row_pivots: ["z"],
columns: ["y", "z"],
aggregates: {
z: "last",
y: "last"
}
});
const paths = await view.column_paths();
expect(paths).toEqual(["__ROW_PATH__", "y", "z"]);
const answer = [
{__ROW_PATH__: [], y: "c", z: true},
{__ROW_PATH__: [false], y: "d", z: false},
{__ROW_PATH__: [true], y: "c", z: true}
];
const result = await view.to_json();
expect(result).toEqual(answer);
view.delete();
table.delete();
});

it("Aggregates are not in columns are ignored", async function() {
const table = await perspective.table(data);
const view = await table.view({
row_pivots: ["z"],
columns: ["y", "z"],
aggregates: {
x: "count"
}
});
const paths = await view.column_paths();
expect(paths).toEqual(["__ROW_PATH__", "y", "z"]);
const answer = [
{__ROW_PATH__: [], y: 4, z: 4},
{__ROW_PATH__: [false], y: 2, z: 2},
{__ROW_PATH__: [true], y: 2, z: 2}
];
const result = await view.to_json();
expect(result).toEqual(answer);
view.delete();
table.delete();
});

it("['z'], sum", async function() {
var table = await perspective.table(data);
var view = await table.view({
Expand Down Expand Up @@ -1290,6 +1335,33 @@ module.exports = perspective => {
table.delete();
});

it("Should return numerical column names in the correct order, 1-sided view", async function() {
const table = await perspective.table({
"2345": [0, 1, 2, 3],
"1.23456789": [0, 1, 2, 3],
"1234": [1, 2, 3, 4],
x: [5, 6, 7, 8]
});

// Previously, we iterated through the aggregates map using the
// order given in Object.keys() which meant that column names that
// were parsable as numbers automatically ended up at the front of
// the map. This test makes sure that column orders are respected
// by the engine for all column names.
const view = await table.view({
row_pivots: ["x"],
columns: ["2345", "1234", "x", "1.23456789"],
aggregates: {
x: "sum",
"1234": "sum"
}
});
const paths = await view.column_paths();
expect(paths).toEqual(["__ROW_PATH__", "2345", "1234", "x", "1.23456789"]);
view.delete();
table.delete();
});

it("Should return all columns in specified order, 1-sided view", async function() {
const table = await perspective.table(data);
const view = await table.view({
Expand Down
59 changes: 59 additions & 0 deletions packages/perspective/test/js/sort.js
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,65 @@ module.exports = perspective => {
table.delete();
});

it("column pivot and hidden sort ['y'] with aggregates specified", async function() {
const table = await perspective.table({
x: [1, 2, 3, 4],
y: ["a", "a", "a", "b"]
});

// Aggregate for hidden sort should be ignored in column-only,
// so just make sure we stick to that.
const view = await table.view({
columns: ["x"],
column_pivots: ["y"],
sort: [["y", "desc"]],
aggregates: {
y: "count"
}
});

const paths = await view.column_paths();
// regular non-col sort should not change order of column paths
expect(paths).toEqual(["a|x", "b|x"]);

const result = await view.to_columns();
expect(result).toEqual({
"a|x": [null, 1, 2, 3],
"b|x": [4, null, null, null]
});
view.delete();
table.delete();
});

it("column pivot and hidden col sort ['y'] with aggregates specified", async function() {
const table = await perspective.table({
x: [1, 2, 3, 4],
y: ["a", "a", "a", "b"]
});

// Aggregate for hidden sort should be ignored in column-only,
// so just make sure we stick to that.
const view = await table.view({
columns: ["x"],
column_pivots: ["y"],
sort: [["y", "col desc"]],
aggregates: {
y: "count"
}
});

const paths = await view.column_paths();
expect(paths).toEqual(["b|x", "a|x"]);

const result = await view.to_columns();
expect(result).toEqual({
"b|x": [null, null, null, 4],
"a|x": [1, 2, 3, null]
});
view.delete();
table.delete();
});

it("column pivot ['y'] with overridden aggregates", async function() {
const table = await perspective.table({
x: [1, 2, 3, 4],
Expand Down
Loading