Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP]: update to apache-arrow@0.3.0 #55

Merged
merged 1 commit into from
Mar 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/perspective/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"author": "",
"license": "Apache",
"dependencies": {
"@apache-arrow/es5-esm": "^0.2.0",
"@apache-arrow/es5-esm": "^0.3.0",
"@jpmorganchase/perspective-common": "^0.1.0",
"babel-runtime": "^6.26.0",
"bluebird": "^3.5.1",
Expand Down
41 changes: 17 additions & 24 deletions packages/perspective/src/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,16 @@ namespace arrow {
{

// Copy out dictionary encoded data
val values = dcol["data"]["values"];
val vdata = values["data"];
val dictionary = dcol["dictionary"];
// ptaylor: This assumes the dictionary is either a Binary or Utf8 Vector. Should it support other Vector types?
val vdata = dictionary["values"];
t_int32 vsize = vdata["length"].as<t_int32>();
std::vector<t_uchar> data;
data.reserve(vsize);
data.resize(vsize);
vecFromTypedArray(vdata, data.data(), vsize);

val voffsets = values["offsets"];
val voffsets = dictionary["valueOffsets"];
t_int32 osize = voffsets["length"].as<t_int32>();
std::vector<t_int32> offsets;
offsets.reserve(osize);
Expand All @@ -197,7 +198,7 @@ namespace arrow {
t_vocab* vocab = col->_get_vocab();
t_str elem;

t_int32 dsize = dcol["data"]["length"].as<t_int32>();
t_int32 dsize = dictionary["length"].as<t_int32>();
for (t_int32 i = 0; i < dsize; ++i) {
t_int32 bidx = offsets[i];
std::size_t es = offsets[i+1] - bidx;
Expand All @@ -219,7 +220,7 @@ _fill_col(val dcol, t_col_sptr col, t_bool is_arrow)
t_uindex nrows = col->size();

if (is_arrow) {
val data = dcol["data"];
val data = dcol["values"];
arrow::vecFromTypedArray(data, col->get_nth<T>(0), nrows);
} else {
for (auto i = 0; i < nrows; ++i)
Expand All @@ -237,7 +238,7 @@ _fill_col<t_int64>(val dcol, t_col_sptr col, t_bool is_arrow)
t_uindex nrows = col->size();

if (is_arrow) {
val data = dcol["data"];
val data = dcol["values"];
// arrow packs 64 bit into two 32 bit ints
arrow::vecFromTypedArray(data, col->get_nth<t_int64>(0), nrows * 2);
} else {
Expand All @@ -252,17 +253,17 @@ _fill_col<t_time>(val dcol, t_col_sptr col, t_bool is_arrow)
t_uindex nrows = col->size();

if (is_arrow) {
val data = dcol["data"];
val data = dcol["values"];
// arrow packs 64 bit into two 32 bit ints
arrow::vecFromTypedArray(data, col->get_nth<t_time>(0), nrows*2);

t_str unit = dcol["unit"].as<t_str>();
if (unit != "MILLISECOND") {
t_int8 unit = dcol["type"]["unit"].as<t_int8>();
if (unit != /* Arrow.enum_.TimeUnit.MILLISECOND */ 1) {
// Slow path - need to convert each value
t_int64 factor = 1;
if (unit == "NANOSECOND") {
if (unit == /* Arrow.enum_.TimeUnit.NANOSECOND */ 3) {
factor = 1e6;
} else if (unit == "MICROSECOND") {
} else if (unit == /* Arrow.enum_.TimeUnit.MICROSECOND */ 2) {
factor = 1e3;
}
for (auto i = 0; i < nrows; ++i)
Expand All @@ -287,7 +288,7 @@ _fill_col<t_bool>(val dcol, t_col_sptr col, t_bool is_arrow)

if (is_arrow) {
// arrow packs bools into a bitmap
val data = dcol["data"];
val data = dcol["values"];
for (auto i = 0; i < nrows; ++i)
{
t_uint8 elem = data[i / 8].as<t_uint8>();
Expand All @@ -313,7 +314,7 @@ _fill_col<std::string>(val dcol, t_col_sptr col, t_bool is_arrow)

if (is_arrow) {
if (dcol["constructor"]["name"].as<t_str>() == "DictionaryVector") {
val vkeys = dcol["keys"]["data"];
val vkeys = dcol["indicies"]["values"];

// Perspective stores string indices in a 32bit unsigned array
// Javascript's typed arrays handle copying from various bitwidth arrays properly
Expand All @@ -333,18 +334,15 @@ _fill_col<std::string>(val dcol, t_col_sptr col, t_bool is_arrow)
}
} else if (dcol["constructor"]["name"].as<t_str>() == "Utf8Vector" ||
dcol["constructor"]["name"].as<t_str>() == "BinaryVector") {
if (dcol["constructor"]["name"].as<t_str>() == "Utf8Vector") {
dcol = dcol["values"];
}

val vdata = dcol["data"];
val vdata = dcol["values"];
t_int32 vsize = vdata["length"].as<t_int32>();
std::vector<t_uint8> data;
data.reserve(vsize);
data.resize(vsize);
arrow::vecFromTypedArray(vdata, data.data(), vsize);

val voffsets = dcol["offsets"];
val voffsets = dcol["valueOffsets"];
t_int32 osize = voffsets["length"].as<t_int32>();
std::vector<t_int32> offsets;
offsets.reserve(osize);
Expand Down Expand Up @@ -460,12 +458,7 @@ _fill_data(t_table_sptr tbl,
if (null_count == 0) {
col->valid_raw_fill(true);
} else {
val validity = dcol;
if (dcol["constructor"]["name"].as<t_str>() == "Utf8Vector") {
validity = dcol["values"]["validity"]["data"];
} else {
validity = dcol["validity"]["data"];
}
val validity = dcol["nullBitmap"];
arrow::fill_col_valid(validity, col);
}
}
Expand Down
Loading