From ee6c0a69520ef44c86bcd8309f2896d7c4e94a31 Mon Sep 17 00:00:00 2001 From: Dave Hirschfeld Date: Mon, 5 Jun 2023 13:27:18 +1000 Subject: [PATCH 01/16] Allow `pandas~=2.0.0` to be installed --- python/perspective/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/perspective/setup.py b/python/perspective/setup.py index f2dbd64439..d5ebe312eb 100644 --- a/python/perspective/setup.py +++ b/python/perspective/setup.py @@ -54,7 +54,7 @@ "ipywidgets>=7.5.1,<9", "future>=0.16.0,<1", "numpy>=1.21.6,<2", - "pandas>=0.22.0,<2", + "pandas>=0.22.0,<2.1", "python-dateutil>=2.8.0,<3", "traitlets>=4.3.2,<6", ] From 323d7d3b096e02380af57d8bd31468fbdc1d1cad Mon Sep 17 00:00:00 2001 From: Tim Paine <3105306+timkpaine@users.noreply.github.com> Date: Thu, 8 Jun 2023 13:38:18 -0400 Subject: [PATCH 02/16] fix test for pandas 2 deviation make pandas bound more liberal --- python/perspective/perspective/tests/table/test_table_pandas.py | 2 ++ python/perspective/setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/perspective/perspective/tests/table/test_table_pandas.py b/python/perspective/perspective/tests/table/test_table_pandas.py index aba6ca511a..3b6b75a4f3 100644 --- a/python/perspective/perspective/tests/table/test_table_pandas.py +++ b/python/perspective/perspective/tests/table/test_table_pandas.py @@ -776,6 +776,8 @@ def test_table_read_nan_datetime_col(self): def test_table_read_nat_datetime_col(self): data = pd.DataFrame({"str": ["abc", "def"], "datetime": ["NaT", datetime(2019, 7, 11, 11, 0)]}) + # datetime col is `datetime` in pandas<2, `object` in pandas>=2, so convert + data.datetime = pd.to_datetime(data.datetime) tbl = Table(data) assert tbl.schema() == { "index": int, diff --git a/python/perspective/setup.py b/python/perspective/setup.py index d5ebe312eb..41f525f5c2 100644 --- a/python/perspective/setup.py +++ b/python/perspective/setup.py @@ -54,7 +54,7 @@ "ipywidgets>=7.5.1,<9", "future>=0.16.0,<1", "numpy>=1.21.6,<2", - "pandas>=0.22.0,<2.1", + "pandas>=0.22.0,<3", "python-dateutil>=2.8.0,<3", "traitlets>=4.3.2,<6", ] From 457d65596c9b65f29b38557e3d571b8d869af819 Mon Sep 17 00:00:00 2001 From: Tom Jakubowski Date: Mon, 12 Jun 2023 19:26:55 -0700 Subject: [PATCH 03/16] Validate installed emsdk version --- scripts/run_emsdk.js | 9 +++++++-- scripts/with_emsdk.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100755 scripts/with_emsdk.sh diff --git a/scripts/run_emsdk.js b/scripts/run_emsdk.js index 9075cd4366..23356e9b53 100644 --- a/scripts/run_emsdk.js +++ b/scripts/run_emsdk.js @@ -13,8 +13,13 @@ const path = require("path"); try { const cwd = process.cwd(); const cmd = process.argv.slice(2).join(" "); - const emsdkdir = path.join(__dirname, "..", ".emsdk"); - execute_throw`cd ${emsdkdir} && . ./emsdk_env.sh >/dev/null 2>&1 && cd ${cwd} && ${cmd}`; + const scripts = __dirname; + const emsdkdir = path.join(scripts, "..", ".emsdk"); + const emversion = require("../package.json").emscripten; + if (!emversion) { + throw new Error("emscripten version not specified in package.json"); + } + execute_throw`${scripts}/with_emsdk.sh ${emsdkdir} ${emversion} ${cmd}`; } catch (e) { console.log(e.message); process.exit(1); diff --git a/scripts/with_emsdk.sh b/scripts/with_emsdk.sh new file mode 100755 index 0000000000..86ff40546b --- /dev/null +++ b/scripts/with_emsdk.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# usage: with_emsdk.sh path/to/emsdk [cmd...] + +emsdk_path=$1 +expected_version=$2 +cmd=("${@:3}") + +spew_equals_version() { + local spew=$1 + local expected=$2 + local extracted + extracted=$(echo "$spew" | grep -Eo '\d+\.\d+\.\d+') + [[ "$expected" == "$extracted" ]] +} + +selftest() { + local spew="emcc (Emscripten gcc/clang-like replacement + linker emulating GNU ld) 3.1.14 (4343cbec72b7db283ea3bda1adc6cb1811ae9a73)" + spew_equals_version "$spew" "3.1.14" || { echo "test failed $LINENO"; exit 1; } +} + +if [[ "$1" == "--selftest" ]] +then + selftest + exit +fi + +source "$emsdk_path"/emsdk_env.sh >/dev/null 2>&1 +version_spew=$(emcc --version 2>&1 | head -n 1) +if spew_equals_version "$version_spew" "$expected_version" +then + "${cmd[@]}" +else + echo "expected emsdk version: $expected_version" + echo "actual emsdk version: $version_spew" + echo "To fix:" + echo " rm -rf .emsdk" + echo " node scripts/install_emsdk.js" + exit 1 +fi From d86d8d373265664585c48af7a5a7e590e5c27b0d Mon Sep 17 00:00:00 2001 From: Tom Jakubowski Date: Tue, 13 Jun 2023 13:58:57 -0700 Subject: [PATCH 04/16] Activate emsdk version from package.json This ensures that the version installed on a developer's clone matches what's in package.json may want to catch the error + add some instructions to fix the problem --- scripts/run_emsdk.js | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/run_emsdk.js b/scripts/run_emsdk.js index 23356e9b53..909a788c20 100644 --- a/scripts/run_emsdk.js +++ b/scripts/run_emsdk.js @@ -13,13 +13,16 @@ const path = require("path"); try { const cwd = process.cwd(); const cmd = process.argv.slice(2).join(" "); - const scripts = __dirname; - const emsdkdir = path.join(scripts, "..", ".emsdk"); - const emversion = require("../package.json").emscripten; + const emsdkdir = path.join(__dirname, "..", ".emsdk"); + const emversion = require(path.join( + __dirname, + "..", + "package.json" + )).emscripten; if (!emversion) { - throw new Error("emscripten version not specified in package.json"); + throw new Error("Emscripten version not specified in package.json"); } - execute_throw`${scripts}/with_emsdk.sh ${emsdkdir} ${emversion} ${cmd}`; + execute_throw`cd ${emsdkdir} && . ./emsdk_env.sh >/dev/null 2>&1 && emsdk activate ${emversion} >/dev/null && cd ${cwd} && ${cmd}`; } catch (e) { console.log(e.message); process.exit(1); From 2837e2a0fa1422b2d86945209a053a16b7766969 Mon Sep 17 00:00:00 2001 From: Tom Jakubowski Date: Tue, 13 Jun 2023 14:10:27 -0700 Subject: [PATCH 05/16] remove helper script --- scripts/with_emsdk.sh | 40 ---------------------------------------- 1 file changed, 40 deletions(-) delete mode 100755 scripts/with_emsdk.sh diff --git a/scripts/with_emsdk.sh b/scripts/with_emsdk.sh deleted file mode 100755 index 86ff40546b..0000000000 --- a/scripts/with_emsdk.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -# usage: with_emsdk.sh path/to/emsdk [cmd...] - -emsdk_path=$1 -expected_version=$2 -cmd=("${@:3}") - -spew_equals_version() { - local spew=$1 - local expected=$2 - local extracted - extracted=$(echo "$spew" | grep -Eo '\d+\.\d+\.\d+') - [[ "$expected" == "$extracted" ]] -} - -selftest() { - local spew="emcc (Emscripten gcc/clang-like replacement + linker emulating GNU ld) 3.1.14 (4343cbec72b7db283ea3bda1adc6cb1811ae9a73)" - spew_equals_version "$spew" "3.1.14" || { echo "test failed $LINENO"; exit 1; } -} - -if [[ "$1" == "--selftest" ]] -then - selftest - exit -fi - -source "$emsdk_path"/emsdk_env.sh >/dev/null 2>&1 -version_spew=$(emcc --version 2>&1 | head -n 1) -if spew_equals_version "$version_spew" "$expected_version" -then - "${cmd[@]}" -else - echo "expected emsdk version: $expected_version" - echo "actual emsdk version: $version_spew" - echo "To fix:" - echo " rm -rf .emsdk" - echo " node scripts/install_emsdk.js" - exit 1 -fi From f68a9cf1cd3c7fb5cdccc0f419db8a4529d8990c Mon Sep 17 00:00:00 2001 From: Tom Jakubowski Date: Tue, 13 Jun 2023 16:18:08 -0700 Subject: [PATCH 06/16] Always install current version in install_emsdk.js The build error you get when emsdk has drifted out of date looks like: error: error: tool is not installed and therefore cannot be activated: 'releases-adedc0750c4a89b65bee866edab24298cb8d6677-64bit' Command failed: cd /Users/tom/perspective/perspective/.emsdk && . ./emsdk_env.sh >/dev/null 2>&1 && emsdk activate 3.1.36 >/dev/null && cd /Users/tom/perspective/perspective/cpp/perspective && node ./build.js The resolution to this now is to run `node scripts/install_emsdk.js`. Previously it wouldn't install the current emsdk version unless the developer first rm-rfed .emsdk It's also still cheap to run the script when the current version is already installed. --- scripts/install_emsdk.js | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/scripts/install_emsdk.js b/scripts/install_emsdk.js index f961cd706d..5a55fa45e7 100644 --- a/scripts/install_emsdk.js +++ b/scripts/install_emsdk.js @@ -34,26 +34,25 @@ function emsdk(...args) { execute_throw`${emsdk} ${args.join(" ")}`; } -function upgrade() { - console.log(`-- Emscripten not found, installing ${emscripten}`); - emsdk_checkout(); +function toolchain_install() { + console.log(`-- Installing Emscripten ${emscripten}`); emsdk("install", emscripten); emsdk("activate", emscripten); console.log(`-- Emscripten ${emscripten} installed`); } -function check() { - try { - const emsdkdir = path.join(__dirname, "..", ".emsdk"); - execute_throw`cd ${emsdkdir} && . ${emsdkdir}/emsdk_env.sh && emcc --version`; - return true; - } catch (e) { - return fs.existsSync(path.join(__dirname, "..", ".emsdk")); - } +function repo_check() { + return fs.existsSync(path.join(base(), "emsdk_env.sh")); } if (!process.env.PSP_SKIP_EMSDK_INSTALL) { - if (!check()) { - upgrade(); + // if a stale toolchain is still activated in the shell, these vars break + // emsdk install in a confusing way. ensure they are unset + for (let ev of ["EMSDK", "EMSDK_NODE", "EMSDK_PYTHON", "SSL_CERT_FILE"]) { + delete process.env[ev]; + } + if (!repo_check()) { + emsdk_checkout(); } + toolchain_install(); } From e27b16937c78f6f843d93e8f2f2df164f74e2da3 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Thu, 15 Jun 2023 00:55:10 -0400 Subject: [PATCH 07/16] Fix `perspective-workspace`'s `save()` method when the settings panel is open. --- .../src/js/workspace/workspace.js | 33 ++- .../test/js/restore.spec.js | 205 ++++++++++++------ rust/perspective-viewer/Cargo.lock | 4 +- .../perspective-test/src/js/global_startup.ts | 11 +- .../src/js/global_teardown.ts | 2 +- 5 files changed, 168 insertions(+), 87 deletions(-) diff --git a/packages/perspective-workspace/src/js/workspace/workspace.js b/packages/perspective-workspace/src/js/workspace/workspace.js index fff38f8ca4..f65be38b58 100644 --- a/packages/perspective-workspace/src/js/workspace/workspace.js +++ b/packages/perspective-workspace/src/js/workspace/workspace.js @@ -169,14 +169,20 @@ export class PerspectiveWorkspace extends SplitPanel { } async save() { + const is_settings = this.dockpanel.mode === "single-document"; + let detail = is_settings + ? this._minimizedLayoutSlots + : PerspectiveDockPanel.mapWidgets( + (widget) => widget.viewer.getAttribute("slot"), + this.dockpanel.saveLayout() + ); + const layout = { sizes: [...this.relativeSizes()], - detail: PerspectiveDockPanel.mapWidgets( - (widget) => widget.viewer.getAttribute("slot"), - this.dockpanel.saveLayout() - ), + detail, mode: this.mode, }; + if (this.masterPanel.isAttached) { const master = { widgets: this.masterPanel.widgets.map((widget) => @@ -186,19 +192,24 @@ export class PerspectiveWorkspace extends SplitPanel { }; layout.master = master; } + const viewers = {}; for (const widget of this.masterPanel.widgets) { viewers[widget.viewer.getAttribute("slot")] = await widget.save(); } + const widgets = PerspectiveDockPanel.getWidgets( - this.dockpanel.saveLayout() + is_settings ? this._minimizedLayout : this.dockpanel.saveLayout() ); + await Promise.all( widgets.map(async (widget) => { - viewers[widget.viewer.getAttribute("slot")] = - await widget.save(); + const slot = widget.viewer.getAttribute("slot"); + viewers[slot] = await widget.save(); + viewers[slot].settings = false; }) ); + return { ...layout, viewers }; } @@ -244,6 +255,7 @@ export class PerspectiveWorkspace extends SplitPanel { callback.bind(this, false), detail ); + this.dockpanel.mode = "multiple-document"; this.dockpanel.restoreLayout(detailLayout); tasks = tasks.concat( PerspectiveDockPanel.getWidgets(detailLayout).map( @@ -494,6 +506,11 @@ export class PerspectiveWorkspace extends SplitPanel { _maximize(widget) { widget.viewer.classList.add("widget-maximize"); this._minimizedLayout = this.dockpanel.saveLayout(); + this._minimizedLayoutSlots = PerspectiveDockPanel.mapWidgets( + (widget) => widget.viewer.getAttribute("slot"), + this.dockpanel.saveLayout() + ); + this._maximizedWidget = widget; this.dockpanel.mode = "single-document"; this.dockpanel.activateWidget(widget); @@ -954,7 +971,7 @@ export class PerspectiveWorkspace extends SplitPanel { } const settings = (event) => { - if (!event.detail) { + if (!event.detail && this.dockpanel.mode === "single-document") { this._unmaximize(); } }; diff --git a/packages/perspective-workspace/test/js/restore.spec.js b/packages/perspective-workspace/test/js/restore.spec.js index f31c4ddf68..7131fe7629 100644 --- a/packages/perspective-workspace/test/js/restore.spec.js +++ b/packages/perspective-workspace/test/js/restore.spec.js @@ -7,7 +7,7 @@ * */ -import { test } from "@playwright/test"; +import { test, expect } from "@playwright/test"; import { compareLightDOMContents, compareShadowDOMContents, @@ -90,44 +90,142 @@ function tests(context, compare) { ); }); - // This test flaps constantly due to mis-ordered HTML attributes and I don't - // want to fix it for the value it provides. - // test.skip("restore workspace with viewers with generated slotids", async (page) => { - // const config = { - // viewers: { - // PERSPECTIVE_GENERATED_ID_0: { - // table: "superstore", - // name: "Test", - // group_by: ["State"], - // columns: ["Sales", "Profit"], - // }, - // }, - // detail: { - // main: { - // currentIndex: 0, - // type: "tab-area", - // widgets: ["PERSPECTIVE_GENERATED_ID_0"], - // }, - // }, - // }; - - // await page.evaluate(async (config) => { - // const workspace = document.getElementById("workspace"); - // await workspace.restore(config); - // }, config); - - // await page.evaluate(async () => { - // const workspace = document.getElementById("workspace").workspace; - // const widget = workspace.getAllWidgets()[0]; - // await workspace.duplicate(widget); - // }); - - // await page.evaluate(async () => { - // await workspace.flush(); - // }); - - // return extract(page); - // }); + test.skip("save workspace with settings panel open", async ({ page }) => { + const config = { + viewers: { + One: { + table: "superstore", + name: "One", + group_by: ["State"], + columns: ["Sales", "Profit"], + }, + Two: { table: "superstore", name: "Two" }, + }, + detail: { + main: { + type: "split-area", + orientation: "vertical", + children: [ + { + type: "tab-area", + widgets: ["One"], + currentIndex: 0, + }, + { + type: "tab-area", + widgets: ["Two"], + currentIndex: 0, + }, + ], + sizes: [0.5, 0.5], + }, + }, + }; + + await page.evaluate(async (config) => { + const workspace = document.getElementById("workspace"); + await workspace.restore(config); + }, config); + + const button = await page.locator(".p-TabBar-tabLabel"); + await button.first().click(); + const saved = await page.evaluate(async () => { + const workspace = document.getElementById("workspace"); + return await workspace.save(); + }); + + expect(saved).toEqual({ + sizes: [1], + detail: { + main: { + type: "split-area", + orientation: "vertical", + children: [ + { type: "tab-area", widgets: ["One"], currentIndex: 0 }, + { type: "tab-area", widgets: ["Two"], currentIndex: 0 }, + ], + sizes: [0.5, 0.5], + }, + }, + mode: "globalFilters", + viewers: { + One: { + plugin: "Datagrid", + plugin_config: { + columns: {}, + editable: false, + scroll_lock: false, + }, + settings: false, + theme: "Pro Light", + title: null, + group_by: ["State"], + split_by: [], + columns: ["Sales", "Profit"], + filter: [], + sort: [], + expressions: [], + aggregates: {}, + master: false, + table: "superstore", + linked: false, + }, + Two: { + plugin: "Datagrid", + plugin_config: { + columns: {}, + editable: false, + scroll_lock: false, + }, + settings: false, + theme: "Pro Light", + title: null, + group_by: [], + split_by: [], + columns: [ + "Row ID", + "Order ID", + "Order Date", + "Ship Date", + "Ship Mode", + "Customer ID", + "Customer Name", + "Segment", + "Country", + "City", + "State", + "Postal Code", + "Region", + "Product ID", + "Category", + "Sub-Category", + "Product Name", + "Sales", + "Quantity", + "Discount", + "Profit", + ], + filter: [], + sort: [], + expressions: [], + aggregates: {}, + master: false, + table: "superstore", + linked: false, + }, + }, + }); + + await page.evaluate(async (config) => { + const workspace = document.getElementById("workspace"); + await workspace.restore(config); + }, saved); + + return compare( + page, + `${context}-save-workspace-with-settings-panel-open.txt` + ); + }); } test.describe("Workspace restore", () => { @@ -139,32 +237,3 @@ test.describe("Workspace restore", () => { tests("shadow-dom", compareShadowDOMContents); }); }); - -// utils.with_server({ paths: PATHS }, () => { -// describe.page( -// "index.html", -// () => { -// describe("Light DOM", () => { -// tests((page) => -// page.evaluate( -// async () => -// document.getElementById("workspace").outerHTML -// ) -// ); -// }); - -// describe("Shadow DOM", () => { -// tests((page) => -// page.evaluate( -// async () => -// document -// .getElementById("workspace") -// .shadowRoot.querySelector("#container") -// .innerHTML -// ) -// ); -// }); -// }, -// { root: TEST_ROOT } -// ); -// }); diff --git a/rust/perspective-viewer/Cargo.lock b/rust/perspective-viewer/Cargo.lock index 233dbbd032..6712a66a81 100644 --- a/rust/perspective-viewer/Cargo.lock +++ b/rust/perspective-viewer/Cargo.lock @@ -908,7 +908,7 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "perspective" -version = "2.2.0" +version = "2.2.1" dependencies = [ "anyhow", "async-lock", @@ -941,7 +941,7 @@ dependencies = [ [[package]] name = "perspective-bundle" -version = "2.2.0" +version = "2.2.1" dependencies = [ "flate2", "wasm-bindgen-cli-support", diff --git a/tools/perspective-test/src/js/global_startup.ts b/tools/perspective-test/src/js/global_startup.ts index 8ce1e824ba..106a825412 100644 --- a/tools/perspective-test/src/js/global_startup.ts +++ b/tools/perspective-test/src/js/global_startup.ts @@ -13,13 +13,8 @@ import path from "path"; export default async function run() { const RESULTS_PATH = path.join(__dirname, "../../results.tar.gz"); - try { - if (fs.existsSync(RESULTS_PATH)) { - console.log("Using results.tar.gz"); - await tar.extract({ file: RESULTS_PATH, gzip: true }); - } - } catch (e) { - console.error("Failed to untar results archives"); - fs.unlinkSync(RESULTS_PATH); + if (fs.existsSync(RESULTS_PATH)) { + console.log("Using results.tar.gz"); + await tar.extract({ file: RESULTS_PATH, gzip: true }); } } diff --git a/tools/perspective-test/src/js/global_teardown.ts b/tools/perspective-test/src/js/global_teardown.ts index b50fc7f2a7..e3c0fd9536 100644 --- a/tools/perspective-test/src/js/global_teardown.ts +++ b/tools/perspective-test/src/js/global_teardown.ts @@ -36,7 +36,7 @@ export default async function run() { return !path.endsWith(".DS_Store"); }, }, - [path.join(__dirname, "../../dist/snapshots")], + [path.join("tools/perspective-test/dist/snapshots")], x ) ); From d639fdba37ee12b86be8e247d5575c614bae41ad Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Thu, 15 Jun 2023 00:55:45 -0400 Subject: [PATCH 08/16] Disable (optional) workspace overflow --- packages/perspective-workspace/src/less/dockpanel.less | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/perspective-workspace/src/less/dockpanel.less b/packages/perspective-workspace/src/less/dockpanel.less index 75a6ccd8fa..37e35e8f01 100644 --- a/packages/perspective-workspace/src/less/dockpanel.less +++ b/packages/perspective-workspace/src/less/dockpanel.less @@ -8,7 +8,7 @@ */ .p-DockPanel { - overflow: visible !important; + overflow: var(--dock-panel--overflow, hidden); position: absolute; background-color: var(--detail--background-color, transparent); padding: 3px; From bd3e2240e82932195508de56f837c449481fc011 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Mon, 19 Jun 2023 19:11:45 -0400 Subject: [PATCH 09/16] Parallelize arrow column reading and writing --- cpp/perspective/src/cpp/arrow_loader.cpp | 34 +- cpp/perspective/src/cpp/view.cpp | 498 ++++++++++++----------- 2 files changed, 279 insertions(+), 253 deletions(-) diff --git a/cpp/perspective/src/cpp/arrow_loader.cpp b/cpp/perspective/src/cpp/arrow_loader.cpp index f0365f436f..1604e17be2 100644 --- a/cpp/perspective/src/cpp/arrow_loader.cpp +++ b/cpp/perspective/src/cpp/arrow_loader.cpp @@ -161,31 +161,31 @@ namespace apachearrow { std::shared_ptr schema = m_table->schema(); std::vector> fields = schema->fields(); - for (long unsigned int cidx = 0; cidx < m_names.size(); ++cidx) { + parallel_for(int(m_names.size()), [&](int cidx) { auto name = m_names[cidx]; t_dtype type = m_types[cidx]; - if (!input_schema.has_column(name)) { + if (input_schema.has_column(name)) { // Skip columns that are defined in the arrow but not // in the Table's input schema. - continue; - } - auto raw_type = fields[cidx]->type()->name(); + auto raw_type = fields[cidx]->type()->name(); - if (name == "__INDEX__") { - implicit_index = true; - std::shared_ptr pkey_col_sptr - = tbl.add_column_sptr("psp_pkey", type, true); - fill_column(tbl, pkey_col_sptr, "psp_pkey", cidx, type, - raw_type, is_update); - tbl.clone_column("psp_pkey", "psp_okey"); - continue; - } else { - auto col = tbl.get_column(name); - fill_column(tbl, col, name, cidx, type, raw_type, is_update); + if (name == "__INDEX__") { + implicit_index = true; + std::shared_ptr pkey_col_sptr + = tbl.add_column_sptr("psp_pkey", type, true); + fill_column(tbl, pkey_col_sptr, "psp_pkey", cidx, type, + raw_type, is_update); + tbl.clone_column("psp_pkey", "psp_okey"); + // continue; + } else { + auto col = tbl.get_column(name); + fill_column( + tbl, col, name, cidx, type, raw_type, is_update); + } } - } + }); // Fill index column - recreated every time a `t_data_table` is created. if (!implicit_index) { diff --git a/cpp/perspective/src/cpp/view.cpp b/cpp/perspective/src/cpp/view.cpp index e0221f7e05..41bc4bf0f2 100644 --- a/cpp/perspective/src/cpp/view.cpp +++ b/cpp/perspective/src/cpp/view.cpp @@ -594,17 +594,18 @@ View::data_slice_to_batches( std::vector> vectors; std::vector> fields; - std::int32_t num_columns = end_col - start_col; - std::vector row_pivots = m_view_config->get_row_pivots(); t_uindex num_row_paths = emit_group_by ? row_pivots.size() : 0; if (num_columns + num_row_paths > 0) { - fields.reserve(num_columns + num_row_paths); - vectors.reserve(num_columns + num_row_paths); + fields.resize(num_columns + num_row_paths); + vectors.resize(num_columns + num_row_paths); } - if (emit_group_by && num_row_paths > 0 && !is_column_only()) { + auto num_output_row_paths = is_column_only() ? 0 : num_row_paths; + + t_uindex write_idx = 0; + if (emit_group_by && num_output_row_paths > 0) { auto schema = m_table->get_schema(); for (auto rpidx = 0; rpidx < num_row_paths; ++rpidx) { std::string column_name = row_pivots.at(rpidx); @@ -631,149 +632,159 @@ View::data_slice_to_batches( std::shared_ptr arr; switch (dtype) { case DTYPE_INT8: { - fields.push_back( - arrow::field(row_path_name, arrow::int8())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::int8()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_UINT8: { - fields.push_back( - arrow::field(row_path_name, arrow::uint8())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::uint8()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_INT16: { - fields.push_back( - arrow::field(row_path_name, arrow::int16())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::int16()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_UINT16: { - fields.push_back( - arrow::field(row_path_name, arrow::uint16())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::uint16()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_INT32: { - fields.push_back( - arrow::field(row_path_name, arrow::int32())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::int32()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_UINT32: { - fields.push_back( - arrow::field(row_path_name, arrow::uint32())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::uint32()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_INT64: { - fields.push_back( - arrow::field(row_path_name, arrow::int64())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::int64()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_UINT64: { - fields.push_back( - arrow::field(row_path_name, arrow::uint64())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::uint64()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_FLOAT32: { - fields.push_back( - arrow::field(row_path_name, arrow::float32())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::float32()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_FLOAT64: { - fields.push_back( - arrow::field(row_path_name, arrow::float64())); - arr = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] + = arrow::field(row_path_name, arrow::float64()); + vectors[write_idx] + = apachearrow::numeric_col_to_array(extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_DATE: { - fields.push_back( - arrow::field(row_path_name, arrow::date32())); - arr = apachearrow::date_col_to_array( + fields[write_idx] + = arrow::field(row_path_name, arrow::date32()); + vectors[write_idx] = apachearrow::date_col_to_array( extents, [&, rpidx](t_uindex ridx) { auto depth = m_ctx->unity_get_row_depth(ridx); if (rpidx < depth) { @@ -785,9 +796,9 @@ View::data_slice_to_batches( }); } break; case DTYPE_TIME: { - fields.push_back(arrow::field(row_path_name, - arrow::timestamp(arrow::TimeUnit::MILLI))); - arr = apachearrow::timestamp_col_to_array( + fields[write_idx] = arrow::field(row_path_name, + arrow::timestamp(arrow::TimeUnit::MILLI)); + vectors[write_idx] = apachearrow::timestamp_col_to_array( extents, [&, rpidx](t_uindex ridx) { auto depth = m_ctx->unity_get_row_depth(ridx); if (rpidx < depth) { @@ -799,9 +810,9 @@ View::data_slice_to_batches( }); } break; case DTYPE_BOOL: { - fields.push_back( - arrow::field(row_path_name, arrow::boolean())); - arr = apachearrow::boolean_col_to_array( + fields[write_idx] + = arrow::field(row_path_name, arrow::boolean()); + vectors[write_idx] = apachearrow::boolean_col_to_array( extents, [&, rpidx](t_uindex ridx) { auto depth = m_ctx->unity_get_row_depth(ridx); if (rpidx < depth) { @@ -813,18 +824,19 @@ View::data_slice_to_batches( }); } break; case DTYPE_STR: { - fields.push_back(arrow::field(row_path_name, - arrow::dictionary(arrow::int32(), arrow::utf8()))); - arr = apachearrow::string_col_to_dictionary_array( - extents, [&, rpidx](t_uindex ridx) { - auto depth = m_ctx->unity_get_row_depth(ridx); - if (rpidx < depth) { - return m_ctx->unity_get_row_path(ridx).at( - (depth - 1) - rpidx); - } else { - return mknone(); - } - }); + fields[write_idx] = arrow::field(row_path_name, + arrow::dictionary(arrow::int32(), arrow::utf8())); + vectors[write_idx] + = apachearrow::string_col_to_dictionary_array( + extents, [&, rpidx](t_uindex ridx) { + auto depth = m_ctx->unity_get_row_depth(ridx); + if (rpidx < depth) { + return m_ctx->unity_get_row_path(ridx).at( + (depth - 1) - rpidx); + } else { + return mknone(); + } + }); } break; case DTYPE_OBJECT: default: { @@ -835,7 +847,8 @@ View::data_slice_to_batches( PSP_COMPLAIN_AND_ABORT(ss.str()); } } - vectors.push_back(arr); + + write_idx++; } } @@ -843,10 +856,10 @@ View::data_slice_to_batches( // the number of hidden sorts, so we can skip hidden sorts. // t_uindex num_view_columns = num_columns - m_hidden_sort.size(); t_uindex num_view_columns = m_columns.size(); - - for (auto cidx = start_col; cidx < end_col; ++cidx) { + std::vector indices; + for (auto tidx = 0; tidx < end_col - start_col; ++tidx) { + auto cidx = tidx + start_col; if (cidx == start_col && num_sides > 0) { - // TODO: write row_paths continue; } @@ -859,6 +872,16 @@ View::data_slice_to_batches( continue; } + indices.push_back(tidx); + } + + // TODO For some reason, this parallel call doesn't benefit from + // parallelism. + parallel_for(int(indices.size()), [&](auto iidx) { + // for (auto iidx = 0; iidx < indices.size(); iidx++) { + auto ccidx = iidx + num_output_row_paths; + auto cidx = indices[iidx] + start_col; + std::vector col_path = names.at(cidx); t_dtype dtype = get_column_dtype(cidx); @@ -882,125 +905,125 @@ View::data_slice_to_batches( std::shared_ptr arr; switch (dtype) { case DTYPE_INT8: { - fields.push_back(arrow::field(name, arrow::int8())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::int8()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_UINT8: { - fields.push_back(arrow::field(name, arrow::uint8())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::uint8()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_INT16: { - fields.push_back(arrow::field(name, arrow::int16())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::int16()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_UINT16: { - fields.push_back(arrow::field(name, arrow::uint16())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::uint16()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_INT32: { - fields.push_back(arrow::field(name, arrow::int32())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::int32()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_UINT32: { - fields.push_back(arrow::field(name, arrow::uint32())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::uint32()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_INT64: { - fields.push_back(arrow::field(name, arrow::int64())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::int64()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_UINT64: { - fields.push_back(arrow::field(name, arrow::uint64())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::uint64()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_FLOAT32: { - fields.push_back(arrow::field(name, arrow::float32())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::float32()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_FLOAT64: { - fields.push_back(arrow::field(name, arrow::float64())); - arr = apachearrow::numeric_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { - return slice[(ridx - extents.m_srow) * stride - + (cidx - extents.m_scol)]; - }); + fields[ccidx] = arrow::field(name, arrow::float64()); + vectors[ccidx] + = apachearrow::numeric_col_to_array(extents, [&](t_uindex ridx) { + return slice[(ridx - extents.m_srow) * stride + + (cidx - extents.m_scol)]; + }); } break; case DTYPE_DATE: { - fields.push_back(arrow::field(name, arrow::date32())); - arr = apachearrow::date_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { + fields[ccidx] = arrow::field(name, arrow::date32()); + vectors[ccidx] = apachearrow::date_col_to_array( + extents, [&](t_uindex ridx) { return slice[(ridx - extents.m_srow) * stride + (cidx - extents.m_scol)]; }); } break; case DTYPE_TIME: { - fields.push_back(arrow::field( - name, arrow::timestamp(arrow::TimeUnit::MILLI))); - arr = apachearrow::timestamp_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { + fields[ccidx] = arrow::field( + name, arrow::timestamp(arrow::TimeUnit::MILLI)); + vectors[ccidx] = apachearrow::timestamp_col_to_array( + extents, [&](t_uindex ridx) { return slice[(ridx - extents.m_srow) * stride + (cidx - extents.m_scol)]; }); } break; case DTYPE_BOOL: { - fields.push_back(arrow::field(name, arrow::boolean())); - arr = apachearrow::boolean_col_to_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { + fields[ccidx] = arrow::field(name, arrow::boolean()); + vectors[ccidx] = apachearrow::boolean_col_to_array( + extents, [&](t_uindex ridx) { return slice[(ridx - extents.m_srow) * stride + (cidx - extents.m_scol)]; }); } break; case DTYPE_STR: { - fields.push_back(arrow::field( - name, arrow::dictionary(arrow::int32(), arrow::utf8()))); - arr = apachearrow::string_col_to_dictionary_array( - extents, [slice, cidx, stride, extents](t_uindex ridx) { + fields[ccidx] = arrow::field( + name, arrow::dictionary(arrow::int32(), arrow::utf8())); + vectors[ccidx] = apachearrow::string_col_to_dictionary_array( + extents, [&](t_uindex ridx) { return slice[(ridx - extents.m_srow) * stride + (cidx - extents.m_scol)]; }); @@ -1014,8 +1037,11 @@ View::data_slice_to_batches( PSP_COMPLAIN_AND_ABORT(ss.str()); } } - vectors.push_back(arr); - } + }); + // } + + fields.resize(indices.size() + num_output_row_paths); + vectors.resize(indices.size() + num_output_row_paths); auto arrow_schema = arrow::schema(fields); auto num_rows = data_slice->num_rows(); From 81960c78c6c87a9d1e6a3d7acb5cfd419925440f Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Mon, 19 Jun 2023 19:13:30 -0400 Subject: [PATCH 10/16] Fix python thread flags --- cpp/perspective/CMakeLists.txt | 6 +++--- cpp/perspective/src/include/perspective/first.h | 8 -------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/cpp/perspective/CMakeLists.txt b/cpp/perspective/CMakeLists.txt index c14e9d5d78..afe1fbf3bc 100644 --- a/cpp/perspective/CMakeLists.txt +++ b/cpp/perspective/CMakeLists.txt @@ -281,7 +281,7 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) -g1 \ ") if (PSP_PYODIDE) - set(OPT_FLAGS "${OPT_FLAGS} -flto") + set(OPT_FLAGS "${OPT_FLAGS} -flto") endif () endif() endif() @@ -607,8 +607,8 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) include_directories(${PSP_PYTHON_SRC}/include) - target_compile_definitions(psp PRIVATE PSP_ENABLE_PYTHON=1) - target_compile_definitions(psppy PRIVATE PSP_ENABLE_PYTHON=1) + target_compile_definitions(psp PRIVATE PSP_ENABLE_PYTHON=1 PSP_PARALLEL_FOR=1) + target_compile_definitions(psppy PRIVATE PSP_ENABLE_PYTHON=1 PSP_PARALLEL_FOR=1) if(WIN32) target_compile_definitions(psppy PRIVATE WIN32=1) diff --git a/cpp/perspective/src/include/perspective/first.h b/cpp/perspective/src/include/perspective/first.h index 0c987df866..efd86c5627 100644 --- a/cpp/perspective/src/include/perspective/first.h +++ b/cpp/perspective/src/include/perspective/first.h @@ -7,14 +7,6 @@ * */ -#ifndef PSP_ENABLE_WASM -#ifdef PSP_ENABLE_PYTHON_THREADING -#ifndef PSP_PARALLEL_FOR -#define PSP_PARALLEL_FOR -#endif -#endif -#endif - #if !defined(__linux__) && !defined(__APPLE__) && !defined(WIN32) // default to linux #define __linux__ From 4dd25f128aceb4b7d1b6a2fd4737d67cf9a8e403 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Mon, 19 Jun 2023 19:21:27 -0400 Subject: [PATCH 11/16] Performance improvements for `to_arrow()` --- cpp/perspective/CMakeLists.txt | 1 + cpp/perspective/src/cpp/arrow_writer.cpp | 5 ----- cpp/perspective/src/cpp/emscripten.cpp | 4 ++-- cpp/perspective/src/cpp/view.cpp | 4 ++-- cpp/perspective/src/include/perspective/arrow_writer.h | 2 +- 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cpp/perspective/CMakeLists.txt b/cpp/perspective/CMakeLists.txt index afe1fbf3bc..76ebffeb8c 100644 --- a/cpp/perspective/CMakeLists.txt +++ b/cpp/perspective/CMakeLists.txt @@ -279,6 +279,7 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) set(OPT_FLAGS " \ -O3 \ -g1 \ + -flto \ ") if (PSP_PYODIDE) set(OPT_FLAGS "${OPT_FLAGS} -flto") diff --git a/cpp/perspective/src/cpp/arrow_writer.cpp b/cpp/perspective/src/cpp/arrow_writer.cpp index 582190c759..947ede89e9 100644 --- a/cpp/perspective/src/cpp/arrow_writer.cpp +++ b/cpp/perspective/src/cpp/arrow_writer.cpp @@ -69,11 +69,6 @@ namespace apachearrow { get_scalar(t_tscalar& t) { return t.get(); } - template <> - std::string - get_scalar(t_tscalar& t) { - return t.to_string(); - } // std::int32_t // get_idx(std::int32_t cidx, std::int32_t ridx, std::int32_t stride, diff --git a/cpp/perspective/src/cpp/emscripten.cpp b/cpp/perspective/src/cpp/emscripten.cpp index 2da4804afd..2fca21d056 100644 --- a/cpp/perspective/src/cpp/emscripten.cpp +++ b/cpp/perspective/src/cpp/emscripten.cpp @@ -161,7 +161,7 @@ namespace binding { default: { std::wstring_convert converter( "", L""); - return t_val(converter.from_bytes(scalar.to_string())); + return t_val(converter.from_bytes(scalar.get())); } } } @@ -459,7 +459,7 @@ namespace binding { for (int idx = 0; idx < data_size; idx++) { t_tscalar scalar = data[idx]; if (scalar.is_valid() && scalar.get_dtype() != DTYPE_NONE) { - auto adx = vocab.get_interned(scalar.to_string()); + auto adx = vocab.get_interned(scalar.get()); indexArray.call("fill", t_val(adx), idx, idx + 1); validityMap[idx / 32] |= 1 << (idx % 32); } else { diff --git a/cpp/perspective/src/cpp/view.cpp b/cpp/perspective/src/cpp/view.cpp index 41bc4bf0f2..59d38dc60b 100644 --- a/cpp/perspective/src/cpp/view.cpp +++ b/cpp/perspective/src/cpp/view.cpp @@ -196,7 +196,7 @@ View::column_names(bool skip, std::int32_t depth) const { for (t_uindex key = 0, max = m_ctx->unity_get_column_count(); key != max; ++key) { t_tscalar name = m_ctx->get_column_name(key); - if (name.to_string() == "psp_okey") { + if (strcmp(name.get(), "psp_okey") == 0) { continue; }; std::vector col_path; @@ -215,7 +215,7 @@ View::column_names(bool skip, std::int32_t depth) const { for (t_uindex key = 0, max = m_ctx->unity_get_column_count(); key != max; ++key) { t_tscalar name = m_ctx->get_column_name(key); - if (name.to_string() == "psp_okey") { + if (strcmp(name.get(), "psp_okey") == 0) { continue; }; std::vector col_path; diff --git a/cpp/perspective/src/include/perspective/arrow_writer.h b/cpp/perspective/src/include/perspective/arrow_writer.h index 44256b90fd..588672e9d7 100644 --- a/cpp/perspective/src/include/perspective/arrow_writer.h +++ b/cpp/perspective/src/include/perspective/arrow_writer.h @@ -122,7 +122,7 @@ namespace apachearrow { // auto idx = get_idx(cidx, ridx, stride, extents); t_tscalar scalar = f(ridx); if (scalar.is_valid() && scalar.get_dtype() != DTYPE_NONE) { - auto adx = vocab.get_interned(scalar.to_string()); + auto adx = vocab.get_interned(scalar.get()); indices_builder.UnsafeAppend(adx); } else { indices_builder.UnsafeAppendNull(); From aaa6a8d506549ffae95e3c78e391f333d32016c0 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Mon, 19 Jun 2023 19:21:47 -0400 Subject: [PATCH 12/16] Fix benchmarks & examples --- examples/python-tornado-streaming/index.html | 8 +- examples/python-tornado-streaming/server.py | 10 +- .../python-tornado/client_server_editing.html | 8 +- examples/python-tornado/index.html | 8 +- examples/python-tornado/server.py | 3 +- examples/python-tornado/server_mode.html | 8 +- python/perspective/bench/runtime/bench.py | 39 ++-- .../bench/runtime/perspective_benchmark.py | 178 ++++-------------- .../runtime/run_perspective_benchmark.py | 48 +++-- .../perspective/bench/tornado/async_server.py | 2 +- 10 files changed, 120 insertions(+), 192 deletions(-) diff --git a/examples/python-tornado-streaming/index.html b/examples/python-tornado-streaming/index.html index 363829ae1e..0a7a24dfc4 100644 --- a/examples/python-tornado-streaming/index.html +++ b/examples/python-tornado-streaming/index.html @@ -12,11 +12,11 @@ - - - + + + - +