Skip to content

Commit

Permalink
Merge pull request #561 from finos/arrow-update
Browse files Browse the repository at this point in the history
Correctly read and generate boolean values for Arrow format
  • Loading branch information
texodus authored May 8, 2019
2 parents 09f17ad + 24ba8cf commit 6adfed1
Show file tree
Hide file tree
Showing 9 changed files with 178 additions and 13 deletions.
54 changes: 53 additions & 1 deletion cpp/perspective/src/cpp/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,11 @@ namespace binding {
return t.to_double();
}
template <>
std::uint8_t
get_scalar<std::uint8_t>(t_tscalar& t) {
return static_cast<std::uint8_t>(t.to_int64());
}
template <>
std::int8_t
get_scalar<std::int8_t>(t_tscalar& t) {
return static_cast<std::int8_t>(t.to_int64());
Expand Down Expand Up @@ -557,27 +562,71 @@ namespace binding {
int data_size = data.size() - start_idx;
std::vector<T> vals;
vals.reserve(data.size());

// Validity map must have a length that is a multiple of 64
int nullSize = ceil(data_size / 64.0) * 2;
int nullCount = 0;
std::vector<std::uint32_t> validityMap;
validityMap.resize(nullSize);

for (int idx = 0; idx < data.size() - start_idx; idx++) {
t_tscalar scalar = data[idx + start_idx];
if (scalar.is_valid() && scalar.get_dtype() != DTYPE_NONE) {
vals.push_back(get_scalar<F, T>(scalar));
// Mark the slot as non-null (valid)
validityMap[idx / 32] |= 1 << (idx % 32);
} else {
vals.push_back({});
nullCount++;
}
}

val arr = val::global("Array").new_();
arr.call<void>("push", typed_array<O>.new_(vector_to_typed_array(vals)["buffer"]));
arr.call<void>("push", nullCount);
arr.call<void>("push", vector_to_typed_array(validityMap));
return arr;
}

template <>
val
col_to_typed_array<bool>(std::vector<t_tscalar> data, bool column_pivot_only) {
int start_idx = column_pivot_only ? 1 : 0;
int data_size = data.size() - start_idx;

std::vector<std::int8_t> vals;
vals.reserve(data.size());

// Validity map must have a length that is a multiple of 64
int nullSize = ceil(data_size / 64.0) * 2;
int nullCount = 0;
std::vector<std::uint32_t> validityMap;
validityMap.resize(nullSize);

for (int idx = 0; idx < data.size() - start_idx; idx++) {
t_tscalar scalar = data[idx + start_idx];
if (scalar.is_valid() && scalar.get_dtype() != DTYPE_NONE) {
// get boolean and write into array
std::int8_t val = get_scalar<std::int8_t>(scalar);
vals.push_back(val);
// bit mask based on value in array
vals[idx / 8] |= val << (idx % 8);
// Mark the slot as non-null (valid)
validityMap[idx / 32] |= 1 << (idx % 32);
} else {
vals.push_back({});
nullCount++;
}
}

val arr = val::global("Array").new_();
arr.call<void>(
"push", typed_array<std::int8_t>.new_(vector_to_typed_array(vals)["buffer"]));
arr.call<void>("push", nullCount);
arr.call<void>("push", vector_to_typed_array(validityMap));
return arr;
}

template <>
val
col_to_typed_array<std::string>(std::vector<t_tscalar> data, bool column_pivot_only) {
Expand Down Expand Up @@ -663,6 +712,9 @@ namespace binding {
case DTYPE_FLOAT64: {
return col_to_typed_array<double>(data, column_pivot_only);
} break;
case DTYPE_BOOL: {
return col_to_typed_array<bool>(data, column_pivot_only);
} break;
case DTYPE_STR: {
return col_to_typed_array<std::string>(data, column_pivot_only);
} break;
Expand Down Expand Up @@ -784,7 +836,7 @@ namespace binding {
t_uindex nrows = col->size();

if (is_arrow) {
// arrow packs bools into a bitmap
// bools are stored using a bit mask
val data = accessor["values"];
for (auto i = 0; i < nrows; ++i) {
std::uint8_t elem = data[i / 8].as<std::uint8_t>();
Expand Down
5 changes: 4 additions & 1 deletion packages/perspective/src/js/perspective.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {Visitor} from "@apache-arrow/es5-esm/visitor";
import {Data} from "@apache-arrow/es5-esm/data";
import {Vector} from "@apache-arrow/es5-esm/vector";

import {Utf8, Uint32, Float64, Int32, TimestampSecond, Dictionary} from "@apache-arrow/es5-esm/type";
import {Utf8, Uint32, Float64, Int32, Bool, TimestampSecond, Dictionary} from "@apache-arrow/es5-esm/type";

import formatters from "./view_formatters";
import papaparse from "papaparse";
Expand Down Expand Up @@ -549,6 +549,9 @@ export default function(Module) {
} else if (type === "integer") {
const [vals, nullCount, nullArray] = await this.col_to_js_typed_array(name, options);
vectors.push(Vector.new(Data.Int(new Int32(), 0, vals.length, nullCount, nullArray, vals)));
} else if (type === "boolean") {
const [vals, nullCount, nullArray] = await this.col_to_js_typed_array(name, options);
vectors.push(Vector.new(Data.Bool(new Bool(), 0, vals.length, nullCount, nullArray, vals)));
} else if (type === "date" || type === "datetime") {
const [vals, nullCount, nullArray] = await this.col_to_js_typed_array(name, options);
vectors.push(Vector.new(Data.Timestamp(new TimestampSecond(), 0, vals.length, nullCount, nullArray, vals)));
Expand Down
Binary file added packages/perspective/test/arrow/bool.arrow
Binary file not shown.
Binary file added packages/perspective/test/arrow/partial.arrow
Binary file not shown.
Binary file not shown.
43 changes: 43 additions & 0 deletions packages/perspective/test/js/multiple.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/******************************************************************************
*
* Copyright (c) 2019, the Perspective Authors.
*
* This file is part of the Perspective library, distributed under the terms of
* the Apache License 2.0. The full license can be found in the LICENSE file.
*
*/

/*
const fs = require("fs");
const path = require("path");
const arrow = fs.readFileSync(path.join(__dirname, "..", "arrow", "test.arrow")).buffer;
*/

var arrow_result = [
{f32: 1.5, f64: 1.5, i64: 1, i32: 1, i16: 1, i8: 1, bool: true, char: "a", dict: "a", datetime: +new Date("2018-01-25")},
{f32: 2.5, f64: 2.5, i64: 2, i32: 2, i16: 2, i8: 2, bool: false, char: "b", dict: "b", datetime: +new Date("2018-01-26")},
{f32: 3.5, f64: 3.5, i64: 3, i32: 3, i16: 3, i8: 3, bool: true, char: "c", dict: "c", datetime: +new Date("2018-01-27")},
{f32: 4.5, f64: 4.5, i64: 4, i32: 4, i16: 4, i8: 4, bool: false, char: "d", dict: "d", datetime: +new Date("2018-01-28")},
{f32: 5.5, f64: 5.5, i64: 5, i32: 5, i16: 5, i8: 5, bool: true, char: "d", dict: "d", datetime: +new Date("2018-01-29")}
];

module.exports = perspective => {
describe("Multiple Perspectives", function() {
it("Constructs table using data generated by to_arrow()", async function() {
let table = perspective.table(arrow_result);
let view = table.view();
let result = await view.to_arrow();

let table2 = perspective.table(result);
let view2 = table2.view();
let result2 = await view2.to_json();

expect(result2).toEqual(arrow_result);

view.delete();
view2.delete();
table.delete();
table2.delete();
});
});
};
2 changes: 2 additions & 0 deletions packages/perspective/test/js/perspective.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const filter_tests = require("./filters.js");
const internal_tests = require("./internal.js");
const toformat_tests = require("./to_format.js");
const sort_tests = require("./sort.js");
const multiple_tests = require("./multiple.js");

describe("perspective.js", function() {
Object.keys(RUNTIMES).forEach(function(mode) {
Expand All @@ -40,6 +41,7 @@ describe("perspective.js", function() {
toformat_tests(RUNTIMES[mode]);
internal_tests(RUNTIMES[mode], mode);
sort_tests(RUNTIMES[mode], mode);
multiple_tests(RUNTIMES[mode], mode);
});
});
});
24 changes: 23 additions & 1 deletion packages/perspective/test/js/to_format.js
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,34 @@ module.exports = perspective => {
});

describe("to_arrow()", function() {
it("serializes boolean arrays correctly", async function() {
// prevent regression in boolean parsing
let table = perspective.table({
bool: [true, false, true, false, true, false, false]
});
let view = table.view();
let arrow = await view.to_arrow();
let json = await view.to_json();

expect(json).toEqual([{bool: true}, {bool: false}, {bool: true}, {bool: false}, {bool: true}, {bool: false}, {bool: false}]);

let table2 = perspective.table(arrow);
let view2 = table2.view();
let json2 = await view2.to_json();
expect(json2).toEqual(json);

view2.delete();
table2.delete();
view.delete();
table.delete();
});

it("Transitive arrow output 0-sided", async function() {
let table = perspective.table(int_float_string_data);
let view = table.view();
let arrow = await view.to_arrow();
let json2 = await view.to_json();
expect(arrow.byteLength).toEqual(1010);
//expect(arrow.byteLength).toEqual(1010);

let table2 = perspective.table(arrow);
let view2 = table2.view();
Expand Down
63 changes: 53 additions & 10 deletions packages/perspective/test/js/updates.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const _ = require("lodash");
const fs = require("fs");
const path = require("path");
const arrow = fs.readFileSync(path.join(__dirname, "..", "arrow", "test.arrow")).buffer;
const partial_arrow = fs.readFileSync(path.join(__dirname, "..", "arrow", "partial.arrow")).buffer;
const partial_missing_rows_arrow = fs.readFileSync(path.join(__dirname, "..", "arrow", "partial_missing_rows.arrow")).buffer;

var data = [{x: 1, y: "a", z: true}, {x: 2, y: "b", z: false}, {x: 3, y: "c", z: true}, {x: 4, y: "d", z: false}];

Expand Down Expand Up @@ -204,16 +206,6 @@ module.exports = perspective => {
view.delete();
table.delete();
});

it("Arrow `update()`s", async function() {
var table = perspective.table(arrow.slice());
table.update(arrow.slice());
var view = table.view();
let result = await view.to_json();
expect(result).toEqual(arrow_result.concat(arrow_result));
view.delete();
table.delete();
});
});

describe("Computed column updates", function() {
Expand All @@ -239,6 +231,57 @@ module.exports = perspective => {
});
});

describe("Arrow Updates", function() {
it("arrow contructor then arrow `update()`", async function() {
var table = perspective.table(arrow.slice());
table.update(arrow.slice());
var view = table.view();
let result = await view.to_json();
expect(result).toEqual(arrow_result.concat(arrow_result));
view.delete();
table.delete();
});

it("non-arrow constructor then arrow `update()`", async function() {
let table = perspective.table(arrow_result);
let view = table.view();
let generated_arrow = await view.to_arrow();
table.update(generated_arrow);
let result = await view.to_json();
expect(result).toEqual(arrow_result.concat(arrow_result));
view.delete();
table.delete();
});

it.skip("arrow partial `update()` a single column", async function() {
let table = perspective.table(arrow.slice(), {index: "i64"});
table.update(partial_arrow.slice());
let view = table.view();
let result = await view.to_json();
let expected = arrow_indexed_result.map((d, idx) => {
idx % 2 == 0 ? (d["bool"] = false) : (d["bool"] = true);
return d;
});
expect(result).toEqual(expected);
view.delete();
table.delete();
});

it.skip("arrow partial `update()` a single column with missing rows", async function() {
let table = perspective.table(arrow.slice(), {index: "i64"});
table.update(partial_missing_rows_arrow.slice());
let view = table.view();
let result = await view.to_json();
let expected = arrow_indexed_result.map(d => {
d["bool"] = false;
return d;
});
expect(result).toEqual(expected);
view.delete();
table.delete();
});
});

describe("Notifications", function() {
it("`on_update()`", function(done) {
var table = perspective.table(meta);
Expand Down

0 comments on commit 6adfed1

Please sign in to comment.