Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix filter values for non-interned strings #2784

Merged
merged 1 commit into from
Oct 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cpp/perspective/src/cpp/data_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,9 @@ t_data_table::filter_cpp(
break;
}

// TODO we can make this faster by not constructing these on
// every iteration?

const auto& ft = fterms[cidx];
bool tval;

Expand Down
31 changes: 25 additions & 6 deletions cpp/perspective/src/cpp/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1002,12 +1002,12 @@ calculate_num_hidden(const ErasedView& view, const t_view_config& config) {
template <typename A>
static t_tscalar
coerce_to(const t_dtype dtype, const A& val) {
if constexpr (std::is_same_v<A, std::string>) {
if constexpr (std::is_same_v<A, const char*>) {
t_tscalar scalar;
scalar.clear();
switch (dtype) {
case DTYPE_STR:
scalar.set(val.c_str());
scalar.set(val);
return scalar;
case DTYPE_BOOL:
scalar.set(val == "true");
Expand Down Expand Up @@ -1585,6 +1585,8 @@ ProtoServer::_handle_request(std::uint32_t client_id, const Request& req) {
));
}

t_vocab vocab;
vocab.init(false);
std::vector<
std::tuple<std::string, std::string, std::vector<t_tscalar>>>
filter;
Expand Down Expand Up @@ -1617,9 +1619,24 @@ ProtoServer::_handle_request(std::uint32_t client_id, const Request& req) {
"Filter column not in schema: " + f.column()
);
}
a = coerce_to(
schema->get_dtype(f.column()), arg.string()
);

if (!t_tscalar::can_store_inplace(
arg.string().c_str()
)) {

a = coerce_to(
schema->get_dtype(f.column()),
vocab.unintern_c(
vocab.get_interned(arg.string())
)
);
} else {

a = coerce_to(
schema->get_dtype(f.column()),
arg.string().c_str()
);
}
args.push_back(a);
break;
}
Expand Down Expand Up @@ -1715,6 +1732,7 @@ ProtoServer::_handle_request(std::uint32_t client_id, const Request& req) {
LOG_DEBUG("FILTER_OP: " << filter_op);

auto config = std::make_shared<t_view_config>(
vocab,
row_pivots,
column_pivots,
aggregates,
Expand Down Expand Up @@ -1906,7 +1924,7 @@ ProtoServer::_handle_request(std::uint32_t client_id, const Request& req) {
auto* f = proto_filter->Add();
f->set_column(filter.m_colname);
f->set_op(filter_op_to_str(filter.m_op));
auto vals = std::vector<t_tscalar>(filter.m_bag.size() + 1);
auto vals = std::vector<t_tscalar>(filter.m_bag.size());
if (filter.m_op != FILTER_OP_NOT_IN
&& filter.m_op != FILTER_OP_IN) {
vals.push_back(filter.m_threshold);
Expand All @@ -1915,6 +1933,7 @@ ProtoServer::_handle_request(std::uint32_t client_id, const Request& req) {
vals.push_back(scalar);
}
}

for (const auto& scalar : vals) {
auto* s = f->mutable_value()->Add();
switch (scalar.get_dtype()) {
Expand Down
2 changes: 2 additions & 0 deletions cpp/perspective/src/cpp/view_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
namespace perspective {

t_view_config::t_view_config(
t_vocab vocab,
const std::vector<std::string>& row_pivots,
const std::vector<std::string>& column_pivots,
const tsl::ordered_map<std::string, std::vector<std::string>>& aggregates,
Expand All @@ -29,6 +30,7 @@ t_view_config::t_view_config(
bool column_only
) :
m_init(false),
m_vocab(std::move(vocab)),
m_row_pivots(row_pivots),
m_column_pivots(column_pivots),
m_aggregates(aggregates),
Expand Down
5 changes: 5 additions & 0 deletions cpp/perspective/src/include/perspective/view_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class PERSPECTIVE_EXPORT t_view_config {
* @param sort
*/
t_view_config(
t_vocab vocab,
const std::vector<std::string>& row_pivots,
const std::vector<std::string>& column_pivots,
const tsl::ordered_map<std::string, std::vector<std::string>>&
Expand Down Expand Up @@ -178,8 +179,12 @@ class PERSPECTIVE_EXPORT t_view_config {
t_dtype dtype
);

// Global dictionary for `t_tscalar` in filter terms.
t_vocab m_vocab;

// containers for primitive data that does not need transformation into
// abstractions

std::vector<std::string> m_row_pivots;
std::vector<std::string> m_column_pivots;
tsl::ordered_map<std::string, std::vector<std::string>> m_aggregates;
Expand Down
8 changes: 7 additions & 1 deletion rust/bootstrap-runtime/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,16 @@ use alloc::vec::Vec;

use zune_inflate::DeflateDecoder;

const HEAP_SIZE: usize = if cfg!(debug_assertions) {
512_000_000
} else {
64_000_000
};

#[allow(unused_unsafe)]
#[global_allocator]
static ALLOCATOR: talc::Talck<talc::locking::AssumeUnlockable, talc::ClaimOnOom> = {
static mut MEMORY: [u8; 64000000] = [0; 64000000];
static mut MEMORY: [u8; HEAP_SIZE] = [0; HEAP_SIZE];
let span = unsafe { talc::Span::from_const_array(core::ptr::addr_of!(MEMORY)) };
talc::Talc::new(unsafe { talc::ClaimOnOom::new(span) }).lock()
};
Expand Down
23 changes: 23 additions & 0 deletions rust/perspective-js/test/js/filters.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,29 @@ const datetime_data_local = [
table.delete();
});

test("y == 'abcdefghijklmnopqrstuvwxyz' (interned)", async function () {
const data = [
{ x: 1, y: "a", z: true },
{ x: 2, y: "b", z: false },
{ x: 3, y: "c", z: true },
{
x: 4,
y: "abcdefghijklmnopqrstuvwxyz",
z: false,
},
];

const table = await perspective.table(data);
const view = await table.view({
filter: [["y", "==", "abcdefghijklmnopqrstuvwxyz"]],
});

const json = await view.to_json();
expect(json).toEqual([data[3]]);
view.delete();
table.delete();
});

test("z == true", async function () {
var table = await perspective.table(data);
var view = await table.view({
Expand Down
44 changes: 44 additions & 0 deletions rust/perspective-js/test/js/view_config.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
// ┃ Copyright (c) 2017, the Perspective Authors. ┃
// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
// ┃ This file is part of the Perspective library, distributed under the terms ┃
// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

import { test, expect } from "@finos/perspective-test";
import perspective from "./perspective_client";

((perspective) => {
test.describe("View config", function () {
test("Non-interned filter strings do not create corrupted view configs", async function () {
const data = [
{ x: 1, y: "a", z: true },
{ x: 2, y: "b", z: false },
{ x: 3, y: "c", z: true },
{
x: 4,
y: "abcdefghijklmnopqrstuvwxyz",
z: false,
},
];

const table = await perspective.table(data);
const view = await table.view({
filter: [["y", "==", "abcdefghijklmnopqrstuvwxyz"]],
});

const config = await view.get_config();
expect(config.filter).toEqual([
["y", "==", "abcdefghijklmnopqrstuvwxyz"],
]);

view.delete();
table.delete();
});
});
})(perspective);
Loading