Skip to content

Commit

Permalink
Add compression options to to_arrow(), update superstore examples.
Browse files Browse the repository at this point in the history
  • Loading branch information
texodus committed Aug 14, 2023
1 parent a0fdc7d commit 4c775aa
Show file tree
Hide file tree
Showing 44 changed files with 627 additions and 93 deletions.
9 changes: 7 additions & 2 deletions cmake/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/column_decoder.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/options.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/parser.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/reader.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/reader.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/writer.cc

# IPC
Expand All @@ -157,6 +157,7 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/function_internal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/kernel.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/ordering.cc


# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/registry.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/kernels/aggregate_basic.cc
Expand Down Expand Up @@ -193,6 +194,7 @@ set(ARROW_SRCS
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/kernels/vector_nested.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/kernels/vector_replace.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/kernels/vector_selection.cc
${PSP_CPP_SRC}/src/cpp/vendor/arrow_compute_registry.cpp

# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/kernels/vector_sort.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compute/kernels/row_encoder.cc
Expand All @@ -213,11 +215,13 @@ if(PSP_PYTHON_BUILD)
${ARROW_SRCS}
# use standard reader in Python builds.
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/reader.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/reader.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/file.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/coo_converter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/csf_converter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/csx_converter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/cpu_info.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/union_util.cc

# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/time.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/bignum-dtoa.cc
Expand All @@ -234,7 +238,8 @@ else()
set(ARROW_SRCS
${ARROW_SRCS}
# Use our vendored reader that does not use threads.
${PSP_CPP_SRC}/src/cpp/vendor/single_threaded_reader.cpp)
${PSP_CPP_SRC}/src/cpp/vendor/single_threaded_reader.cpp
${PSP_CPP_SRC}/src/cpp/vendor/arrow_single_threaded_reader.cpp)
endif()

set_property(SOURCE util/io_util.cc
Expand Down
3 changes: 1 addition & 2 deletions cpp/perspective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
endif()

set(SOURCE_FILES
${PSP_CPP_SRC}/src/cpp/vendor/arrow_compute_registry.cpp
${PSP_CPP_SRC}/src/cpp/aggregate.cpp
${PSP_CPP_SRC}/src/cpp/aggspec.cpp
${PSP_CPP_SRC}/src/cpp/arg_sort.cpp
Expand Down Expand Up @@ -510,7 +509,7 @@ set(PYTHON_SOURCE_FILES ${SOURCE_FILES}
)

set(WASM_SOURCE_FILES ${SOURCE_FILES}
${PSP_CPP_SRC}/src/cpp/vendor/arrow_single_threaded_reader.cpp
# ${PSP_CPP_SRC}/src/cpp/vendor/arrow_single_threaded_reader.cpp
)

set(PYTHON_BINDING_SOURCE_FILES
Expand Down
6 changes: 5 additions & 1 deletion cpp/perspective/src/cpp/arrow_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,11 @@ namespace apachearrow {
auto read_options = arrow::csv::ReadOptions::Defaults();
auto parse_options = arrow::csv::ParseOptions::Defaults();
auto convert_options = arrow::csv::ConvertOptions::Defaults();

// #ifdef PSP_PARALLEL_FOR
// read_options.use_threads = true;
// #else
// read_options.use_threads = false;
// #endif
read_options.use_threads = false;
parse_options.newlines_in_values = true;

Expand Down
9 changes: 5 additions & 4 deletions cpp/perspective/src/cpp/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,10 @@ namespace binding {
template <typename CTX_T>
t_val
to_arrow(std::shared_ptr<View<CTX_T>> view, std::int32_t start_row,
std::int32_t end_row, std::int32_t start_col, std::int32_t end_col) {
std::shared_ptr<std::string> s
= view->to_arrow(start_row, end_row, start_col, end_col, true);
std::int32_t end_row, std::int32_t start_col, std::int32_t end_col,
bool compress) {
std::shared_ptr<std::string> s = view->to_arrow(
start_row, end_row, start_col, end_col, true, compress);
return str_to_arraybuffer(s)["buffer"];
}

Expand All @@ -226,7 +227,7 @@ namespace binding {
t_val
get_row_delta(std::shared_ptr<View<CTX_T>> view) {
auto slice = view->get_row_delta();
auto row_delta = view->data_slice_to_arrow(slice, false);
auto row_delta = view->data_slice_to_arrow(slice, false, false);
return str_to_arraybuffer(row_delta)["buffer"];
}

Expand Down
19 changes: 16 additions & 3 deletions cpp/perspective/src/cpp/view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,10 +539,11 @@ View<t_ctx2>::get_data(t_uindex start_row, t_uindex end_row, t_uindex start_col,
template <typename CTX_T>
std::shared_ptr<std::string>
View<CTX_T>::to_arrow(std::int32_t start_row, std::int32_t end_row,
std::int32_t start_col, std::int32_t end_col, bool emit_group_by) const {
std::int32_t start_col, std::int32_t end_col, bool emit_group_by,
bool compress) const {
std::shared_ptr<t_data_slice<CTX_T>> data_slice
= get_data(start_row, end_row, start_col, end_col);
return data_slice_to_arrow(data_slice, emit_group_by);
return data_slice_to_arrow(data_slice, emit_group_by, compress);
};

template <>
Expand Down Expand Up @@ -1068,7 +1069,8 @@ View<CTX_T>::data_slice_to_batches(
template <typename CTX_T>
std::shared_ptr<std::string>
View<CTX_T>::data_slice_to_arrow(
std::shared_ptr<t_data_slice<CTX_T>> data_slice, bool emit_group_by) const {
std::shared_ptr<t_data_slice<CTX_T>> data_slice, bool emit_group_by,
bool compress) const {
std::pair<std::shared_ptr<arrow::Schema>,
std::shared_ptr<arrow::RecordBatch>>
pairs = data_slice_to_batches(emit_group_by, data_slice);
Expand All @@ -1087,6 +1089,17 @@ View<CTX_T>::data_slice_to_arrow(
buffer = *allocated;
arrow::io::BufferOutputStream sink(buffer);
auto options = arrow::ipc::IpcWriteOptions::Defaults();
if (compress) {
auto codec = arrow::util::Codec::Create(arrow::Compression::LZ4_FRAME);
options.codec = std::move(codec).ValueUnsafe();
}

// #ifdef PSP_PARALLEL_FOR
// options.use_threads = false;
// #else
// options.use_threads = false;
// #endif
options.use_threads = false;
auto res = arrow::ipc::MakeStreamWriter(&sink, arrow_schema, options);
std::shared_ptr<arrow::ipc::RecordBatchWriter> writer = *res;
PSP_CHECK_ARROW_STATUS(writer->WriteRecordBatch(*batches));
Expand Down
6 changes: 3 additions & 3 deletions cpp/perspective/src/include/perspective/view.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class PERSPECTIVE_EXPORT View {
*/
std::shared_ptr<std::string> to_arrow(std::int32_t start_row,
std::int32_t end_row, std::int32_t start_col, std::int32_t end_col,
bool emit_group_by) const;
bool emit_group_by, bool compress) const;

/**
* @brief Serializes the `View`'s data into the Apache Arrow format
Expand Down Expand Up @@ -211,8 +211,8 @@ class PERSPECTIVE_EXPORT View {
* @return std::shared_ptr<std::string>
*/
std::shared_ptr<std::string> data_slice_to_arrow(
std::shared_ptr<t_data_slice<CTX_T>> data_slice,
bool emit_group_by) const;
std::shared_ptr<t_data_slice<CTX_T>> data_slice, bool emit_group_b,
bool compress) const;

/**
* @brief Serializes a given data slice into the Apache Arrow format. Can
Expand Down
2 changes: 1 addition & 1 deletion docs/build.js
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ function template(is_dark) {
}
window.addEventListener('DOMContentLoaded', function () {
var xhr = new XMLHttpRequest();
xhr.open('GET', '/node_modules/superstore-arrow/superstore.arrow', true);
xhr.open('GET', '/node_modules/superstore-arrow/superstore.lz4.arrow', true);
xhr.responseType = "arraybuffer"
xhr.onload = on_load.bind(xhr);
xhr.send(null);
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/js.md
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ const fs = require("fs");
const host = new WebSocketServer({ assets: [__dirname], port: 8080 });

// Read an arrow file from the file system and host it as a named table.
const arr = fs.readFileSync(__dirname + "/superstore.arrow");
const arr = fs.readFileSync(__dirname + "/superstore.lz4.arrow");
table(arr).then((table) => {
host.host_table("table_one", table);
});
Expand Down
2 changes: 1 addition & 1 deletion docs/src/data/superstore.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

import perspective from "@finos/perspective";
import SUPERSTORE_URL from "superstore-arrow/superstore.arrow";
import SUPERSTORE_URL from "superstore-arrow/superstore.lz4.arrow";

export const SUPERSTORE_TABLE = (async function () {
const worker = perspective.shared_worker();
Expand Down
2 changes: 1 addition & 1 deletion examples/blocks/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
"@finos/perspective-viewer-datagrid": "^2.4.0",
"@finos/perspective-viewer-openlayers": "^2.4.0",
"@finos/perspective-workspace": "^2.4.0",
"superstore-arrow": "1.0.0"
"superstore-arrow": "3.0.0"
}
}
11 changes: 5 additions & 6 deletions examples/blocks/src/editable/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,minimum-scale=1,user-scalable=no"/>
<meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1,minimum-scale=1,user-scalable=no" />

<script type="module" src="/node_modules/@finos/perspective/dist/cdn/perspective.js"></script>
<script type="module" src="/node_modules/@finos/perspective-viewer/dist/cdn/perspective-viewer.js"></script>
Expand All @@ -21,26 +21,25 @@

<link rel="preload" href="/node_modules/@finos/perspective/dist/cdn/perspective.cpp.wasm" as="fetch" type="application/wasm" crossorigin="anonymous" />
<link rel="preload" href="/node_modules/@finos/perspective-viewer/dist/cdn/perspective_bg.wasm" as="fetch" type="application/wasm" crossorigin="anonymous" />
<link rel="preload" href="/node_modules/superstore-arrow/superstore.arrow" as="fetch" type="arraybuffer" crossorigin="anonymous" />
<link rel="preload" href="/node_modules/superstore-arrow/superstore.lz4.arrow" as="fetch" type="arraybuffer" crossorigin="anonymous" />
<link rel="preload" href="/node_modules/@finos/perspective/dist/cdn/perspective.worker.js" as="fetch" type="application/javascript" crossorigin="anonymous" />

<script type="module">
import {worker} from "/node_modules/@finos/perspective/dist/cdn/perspective.js";
import { worker } from "/node_modules/@finos/perspective/dist/cdn/perspective.js";

const WORKER = worker();
const REQ = fetch("/node_modules/superstore-arrow/superstore.arrow");
const REQ = fetch("/node_modules/superstore-arrow/superstore.lz4.arrow");

async function load() {
const resp = await REQ;
const arrow = await resp.arrayBuffer();
const el = document.getElementsByTagName("perspective-viewer")[0];
const table = WORKER.table(arrow);
el.load(table);
el.restore({settings: true, plugin_config: {editable: true}})
el.restore({ settings: true, plugin_config: { editable: true } });
}

load();

</script>

<style>
Expand Down
2 changes: 1 addition & 1 deletion examples/blocks/src/superstore/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

<script type="module">
import perspective from "/node_modules/@finos/perspective/dist/cdn/perspective.js";
const DATA_URL = "/node_modules/superstore-arrow/superstore.arrow";
const DATA_URL = "/node_modules/superstore-arrow/superstore.lz4.arrow";

async function get_layout() {
const req = await fetch("layout.json");
Expand Down
2 changes: 1 addition & 1 deletion examples/esbuild-example/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"@finos/perspective-viewer-d3fc": "^2.4.0",
"@finos/perspective-viewer-datagrid": "^2.4.0",
"@finos/perspective-viewer-openlayers": "^2.4.0",
"superstore-arrow": "^1.0.0"
"superstore-arrow": "^3.0.0"
},
"devDependencies": {
"@finos/perspective-esbuild-plugin": "^2.4.0",
Expand Down
18 changes: 9 additions & 9 deletions examples/esbuild-example/src/index.html
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
<!doctype html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />

<title>Perspective `esbuild` Example</title>

<link rel="preload" href="perspective.cpp.wasm" as="fetch" type="application/wasm" crossorigin="anonymous">
<link rel="preload" href="perspective_viewer_bg.wasm" as="fetch" type="application/wasm" crossorigin="anonymous">
<link rel="preload" href="superstore.arrow" as="fetch" type="arraybuffer" crossorigin="anonymous">
<link rel="preload" href="perspective.worker.js" as="fetch" type="application/javascript" crossorigin="anonymous">
<link rel="preload" href="editor.worker.js" as="fetch" type="application/javascript" crossorigin="anonymous">
<link rel="preload" href="perspective.cpp.wasm" as="fetch" type="application/wasm" crossorigin="anonymous" />
<link rel="preload" href="perspective_viewer_bg.wasm" as="fetch" type="application/wasm" crossorigin="anonymous" />
<link rel="preload" href="superstore.lz4.arrow" as="fetch" type="arraybuffer" crossorigin="anonymous" />
<link rel="preload" href="perspective.worker.js" as="fetch" type="application/javascript" crossorigin="anonymous" />
<link rel="preload" href="editor.worker.js" as="fetch" type="application/javascript" crossorigin="anonymous" />

<script type="module" src="index.js"></script>
<link rel="stylesheet" href="index.css" />
</head>
<body></body>
</html>
</html>
2 changes: 1 addition & 1 deletion examples/esbuild-example/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import "@finos/perspective-viewer/dist/css/pro-dark.css";

import "./index.css";

import arrow from "superstore-arrow/superstore.arrow";
import arrow from "superstore-arrow/superstore.lz4.arrow";
const req = fetch(arrow);
const worker = perspective.shared_worker();

Expand Down
2 changes: 1 addition & 1 deletion examples/python-aiohttp/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"@finos/perspective-viewer-d3fc": "^2.4.0",
"@finos/perspective-viewer-datagrid": "^2.4.0",
"@finos/perspective-workspace": "^2.4.0",
"superstore-arrow": "^1.0.0"
"superstore-arrow": "^3.0.0"
},
"devDependencies": {
"@finos/perspective-webpack-plugin": "^2.4.0",
Expand Down
2 changes: 1 addition & 1 deletion examples/python-aiohttp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

here = os.path.abspath(os.path.dirname(__file__))
file_path = os.path.join(
here, "..", "..", "node_modules", "superstore-arrow", "superstore.arrow"
here, "..", "..", "node_modules", "superstore-arrow", "superstore.lz4.arrow"
)


Expand Down
2 changes: 1 addition & 1 deletion examples/python-starlette/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"@finos/perspective-viewer-d3fc": "^2.4.0",
"@finos/perspective-viewer-datagrid": "^2.4.0",
"@finos/perspective-workspace": "^2.4.0",
"superstore-arrow": "^1.0.0"
"superstore-arrow": "^3.0.0"
},
"devDependencies": {
"@finos/perspective-webpack-plugin": "^2.4.0",
Expand Down
2 changes: 1 addition & 1 deletion examples/python-starlette/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

here = os.path.abspath(os.path.dirname(__file__))
file_path = os.path.join(
here, "..", "..", "node_modules", "superstore-arrow", "superstore.arrow"
here, "..", "..", "node_modules", "superstore-arrow", "superstore.lz4.arrow"
)


Expand Down
2 changes: 1 addition & 1 deletion examples/python-tornado-streaming/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"@finos/perspective-viewer-d3fc": "^2.4.0",
"@finos/perspective-viewer-datagrid": "^2.4.0",
"@finos/perspective-workspace": "^2.4.0",
"superstore-arrow": "^1.0.0"
"superstore-arrow": "^3.0.0"
},
"devDependencies": {
"@finos/perspective-webpack-plugin": "^2.4.0",
Expand Down
2 changes: 1 addition & 1 deletion examples/python-tornado/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"@finos/perspective-viewer-d3fc": "^2.4.0",
"@finos/perspective-viewer-datagrid": "^2.4.0",
"@finos/perspective-workspace": "^2.4.0",
"superstore-arrow": "^1.0.0"
"superstore-arrow": "^3.0.0"
},
"devDependencies": {
"@finos/perspective-webpack-plugin": "^2.4.0",
Expand Down
2 changes: 1 addition & 1 deletion examples/python-tornado/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

here = os.path.abspath(os.path.dirname(__file__))
file_path = os.path.join(
here, "..", "..", "node_modules", "superstore-arrow", "superstore.arrow"
here, "..", "..", "node_modules", "superstore-arrow", "superstore.lz4.arrow"
)

IS_MULTI_THREADED = True
Expand Down
2 changes: 1 addition & 1 deletion examples/react-example/src/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import "./index.css";
const worker = perspective.default.shared_worker();

const getTable = async (): Promise<perspective.Table> => {
const req = fetch("./superstore.arrow");
const req = fetch("./superstore.lz4.arrow");
const resp = await req;
const buffer = await resp.arrayBuffer();
return await worker.table(buffer as any);
Expand Down
2 changes: 1 addition & 1 deletion examples/webpack-example/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import "@finos/perspective-viewer/dist/css/themes.css";

import "./index.css";

import superstore from "superstore-arrow/superstore.arrow";
import superstore from "superstore-arrow/superstore.lz4.arrow";

const worker = perspective.shared_worker();

Expand Down
2 changes: 1 addition & 1 deletion examples/workspace-editing-python/src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"..",
"node_modules",
"superstore-arrow",
"superstore.arrow",
"superstore.lz4.arrow",
)


Expand Down
2 changes: 1 addition & 1 deletion examples/workspace/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import "@finos/perspective-viewer-d3fc";
import "./index.less";

const datasource = async () => {
const req = fetch("./superstore.arrow");
const req = fetch("./superstore.lz4.arrow");
const resp = await req;
const buffer = await resp.arrayBuffer();
const worker = perspective.shared_worker();
Expand Down
Loading

0 comments on commit 4c775aa

Please sign in to comment.