Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix column ordering in Python, null handling for computed columns #907

Merged
merged 4 commits into from
Feb 8, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix #904
  • Loading branch information
sc1f committed Feb 5, 2020
commit f05bd8d6eef8da86c17965248ebd5b1fd823c5a0
18 changes: 10 additions & 8 deletions python/perspective/perspective/src/view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,18 @@ make_view_config(const t_schema& schema, t_val date_parser, t_val config) {

// to preserve order, do not cast to std::map - use keys and python 3.7's guarantee that dicts respect insertion order
auto p_aggregates = py::dict(config.attr("get_aggregates")());
auto aggregate_keys = py::list(p_aggregates.attr("keys")());
tsl::ordered_map<std::string, std::vector<std::string>> aggregates;

for (auto& key : aggregate_keys) {
const std::string key_str = key.cast<std::string>();
if (py::isinstance<py::str>(p_aggregates[key])) {
std::vector<std::string> agg{p_aggregates[key].cast<std::string>()};
aggregates[key_str] = agg;
} else {
aggregates[key_str] = p_aggregates[key].cast<std::vector<std::string>>();
for (auto& column : columns) {
py::str py_column_name = py::str(column);
if (p_aggregates.contains(py_column_name)) {
if (py::isinstance<py::str>(p_aggregates[py_column_name])) {
std::vector<std::string> agg{
p_aggregates[py_column_name].cast<std::string>()};
aggregates[column] = agg;
} else {
aggregates[column] = p_aggregates[py_column_name].cast<std::vector<std::string>>();
}
}
};

Expand Down
71 changes: 24 additions & 47 deletions python/perspective/perspective/tests/table/test_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,40 +262,35 @@ def test_view_dataframe_column_order(self):
assert view.column_paths() == [
"__ROW_PATH__", "-0.1", "-0.05", "0.0", "0.1"]

def test_view_aggregate_order(self):
'''In Python 3.7 and above, a dict's insertion order is guaranteed. We use this guarantee to ensure that
the order of columns shown is the same as the order of keys in a schema/data passed in by the user.

In the Python 2 runtime, order cannot be guaranteed without the usage of OrderedMap in C++.
'''
import six
def test_view_aggregate_order_with_columns(self):
'''If `columns` is provided, order is always guaranteed.'''
data = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
tbl = Table(data)
view = tbl.view(
row_pivots=["a"],
columns=["a", "b", "c", "d"],
aggregates={"d": "avg", "c": "avg", "b": "last", "a": "last"}
)

order = ["__ROW_PATH__", "d", "c", "b", "a"]
records = view.to_records()
order = ["__ROW_PATH__", "a", "b", "c", "d"]
assert view.column_paths() == order

assert records == [
{"__ROW_PATH__": [], "d": 5.0, "c": 4.0, "b": 4, "a": 3},
{"__ROW_PATH__": ["1"], "d": 4.0, "c": 3.0, "b": 2, "a": 1},
{"__ROW_PATH__": ["3"], "d": 6.0, "c": 5.0, "b": 4, "a": 3}
]
def test_view_df_aggregate_order_with_columns(self):
'''If `columns` is provided, order is always guaranteed.'''
data = pd.DataFrame({
"a": [1, 2, 3],
"b": [2, 3, 4],
"c": [3, 4, 5],
"d": [4, 5, 6]
}, columns=["d", "a", "c", "b"])
tbl = Table(data)
view = tbl.view(
row_pivots=["a"],
aggregates={"d": "avg", "c": "avg", "b": "last", "a": "last"}
)

if six.PY2:
# only test for presence, not order
for record in records:
keys = list(record.keys())
for key in keys:
assert key in order
else:
for record in records:
keys = list(record.keys())
for i in range(len(keys)):
assert keys[i] == order[i]
order = ["__ROW_PATH__", "index", "d", "a", "c", "b"]
assert view.column_paths() == order

def test_view_aggregates_with_no_columns(self):
data = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
Expand All @@ -312,8 +307,8 @@ def test_view_aggregates_with_no_columns(self):
]

def test_view_aggregates_column_order(self):
'''Again, dict insertion order is not guaranteed in Python <3.7.'''
import six
'''Order of columns are entirely determined by the `columns` kwarg. If
it is not provided, order of columns is undefined behavior.'''
data = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
tbl = Table(data)
view = tbl.view(
Expand All @@ -322,26 +317,8 @@ def test_view_aggregates_column_order(self):
columns=["a", "c"]
)

order = ["__ROW_PATH__", "c", "a"]
records = view.to_records()

assert records == [
{"__ROW_PATH__": [], "c": 4.0, "a": 3},
{"__ROW_PATH__": ["1"], "c": 3.0, "a": 1},
{"__ROW_PATH__": ["3"], "c": 5.0, "a": 3}
]

if six.PY2:
# only test for presence, not order
for record in records:
keys = list(record.keys())
for key in keys:
assert key in order
else:
for record in records:
keys = list(record.keys())
for i in range(len(keys)):
assert keys[i] == order[i]
order = ["__ROW_PATH__", "a", "c"]
assert view.column_paths() == order

def test_view_column_pivot_datetime_names(self):
data = {"a": [datetime(2019, 7, 11, 12, 30)], "b": [1]}
Expand Down