From 97e8a9baee996377eed5fadf1f2e822680119dac Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Tue, 3 Sep 2024 16:45:38 +0200 Subject: [PATCH] add performance regression testing to ci --- .github/regression/tpcds_sf1_local.csv | 100 +++++++ .github/regression/tpch_sf1_local.csv | 22 ++ .github/workflows/LocalTesting.yml | 76 +++++- benchmark/benchmark.Makefile | 25 +- benchmark/tpcds/sf1/local/delta/load.sql | 2 +- .../sf1/local/duckdb/tpcds_sf1.benchmark.in | 2 +- benchmark/tpcds/sf1/local/parquet/load.sql | 2 +- benchmark/tpch/sf1/delta-remote/load.sql | 8 - .../delta-remote/tpch_sf1_delta.benchmark.in | 19 -- benchmark/tpch/sf1/delta/q01.benchmark | 7 - benchmark/tpch/sf1/delta/q02.benchmark | 7 - benchmark/tpch/sf1/delta/q03.benchmark | 7 - benchmark/tpch/sf1/delta/q04.benchmark | 7 - benchmark/tpch/sf1/delta/q05.benchmark | 7 - benchmark/tpch/sf1/delta/q06.benchmark | 7 - benchmark/tpch/sf1/delta/q07.benchmark | 7 - benchmark/tpch/sf1/delta/q08.benchmark | 7 - benchmark/tpch/sf1/delta/q09.benchmark | 7 - benchmark/tpch/sf1/delta/q10.benchmark | 7 - benchmark/tpch/sf1/delta/q11.benchmark | 7 - benchmark/tpch/sf1/delta/q12.benchmark | 7 - benchmark/tpch/sf1/delta/q13.benchmark | 7 - benchmark/tpch/sf1/delta/q14.benchmark | 7 - benchmark/tpch/sf1/delta/q15.benchmark | 7 - benchmark/tpch/sf1/delta/q16.benchmark | 7 - benchmark/tpch/sf1/delta/q17.benchmark | 7 - benchmark/tpch/sf1/delta/q18.benchmark | 7 - benchmark/tpch/sf1/delta/q19.benchmark | 7 - benchmark/tpch/sf1/delta/q20.benchmark | 7 - benchmark/tpch/sf1/delta/q21.benchmark | 7 - benchmark/tpch/sf1/delta/q22.benchmark | 7 - benchmark/tpch/sf1/{ => local}/delta/load.sql | 0 benchmark/tpch/sf1/local/delta/q01.benchmark | 7 + benchmark/tpch/sf1/local/delta/q02.benchmark | 7 + benchmark/tpch/sf1/local/delta/q03.benchmark | 7 + benchmark/tpch/sf1/local/delta/q04.benchmark | 7 + benchmark/tpch/sf1/local/delta/q05.benchmark | 7 + benchmark/tpch/sf1/local/delta/q06.benchmark | 7 + benchmark/tpch/sf1/local/delta/q07.benchmark | 7 + benchmark/tpch/sf1/local/delta/q08.benchmark | 7 + benchmark/tpch/sf1/local/delta/q09.benchmark | 7 + benchmark/tpch/sf1/local/delta/q10.benchmark | 7 + benchmark/tpch/sf1/local/delta/q11.benchmark | 7 + benchmark/tpch/sf1/local/delta/q12.benchmark | 7 + benchmark/tpch/sf1/local/delta/q13.benchmark | 7 + benchmark/tpch/sf1/local/delta/q14.benchmark | 7 + benchmark/tpch/sf1/local/delta/q15.benchmark | 7 + benchmark/tpch/sf1/local/delta/q16.benchmark | 7 + benchmark/tpch/sf1/local/delta/q17.benchmark | 7 + benchmark/tpch/sf1/local/delta/q18.benchmark | 7 + benchmark/tpch/sf1/local/delta/q19.benchmark | 7 + benchmark/tpch/sf1/local/delta/q20.benchmark | 7 + benchmark/tpch/sf1/local/delta/q21.benchmark | 7 + benchmark/tpch/sf1/local/delta/q22.benchmark | 7 + .../delta/tpch_sf1_delta.benchmark.in | 2 +- .../duckdb}/q01.benchmark | 4 +- .../duckdb}/q02.benchmark | 4 +- .../duckdb}/q03.benchmark | 4 +- .../duckdb}/q04.benchmark | 4 +- .../duckdb}/q05.benchmark | 4 +- .../duckdb}/q06.benchmark | 4 +- .../duckdb}/q07.benchmark | 4 +- .../duckdb}/q08.benchmark | 4 +- .../duckdb}/q09.benchmark | 4 +- .../duckdb}/q10.benchmark | 4 +- .../duckdb}/q11.benchmark | 4 +- .../duckdb}/q12.benchmark | 4 +- .../duckdb}/q13.benchmark | 4 +- .../duckdb}/q14.benchmark | 4 +- .../duckdb}/q15.benchmark | 4 +- .../duckdb}/q16.benchmark | 4 +- .../duckdb}/q17.benchmark | 4 +- .../duckdb}/q18.benchmark | 4 +- .../duckdb}/q19.benchmark | 4 +- .../duckdb}/q20.benchmark | 4 +- .../duckdb}/q21.benchmark | 4 +- .../duckdb}/q22.benchmark | 4 +- .../local/duckdb/tpch_sf1_delta.benchmark.in | 24 ++ benchmark/tpch/sf1/parquet-remote/load.sql | 8 - .../tpch/sf1/parquet-remote/q01.benchmark | 7 - .../tpch/sf1/parquet-remote/q02.benchmark | 7 - .../tpch/sf1/parquet-remote/q03.benchmark | 7 - .../tpch/sf1/parquet-remote/q04.benchmark | 7 - .../tpch/sf1/parquet-remote/q05.benchmark | 7 - .../tpch/sf1/parquet-remote/q06.benchmark | 7 - .../tpch/sf1/parquet-remote/q07.benchmark | 7 - .../tpch/sf1/parquet-remote/q08.benchmark | 7 - .../tpch/sf1/parquet-remote/q09.benchmark | 7 - .../tpch/sf1/parquet-remote/q10.benchmark | 7 - .../tpch/sf1/parquet-remote/q11.benchmark | 7 - .../tpch/sf1/parquet-remote/q12.benchmark | 7 - .../tpch/sf1/parquet-remote/q13.benchmark | 7 - .../tpch/sf1/parquet-remote/q14.benchmark | 7 - .../tpch/sf1/parquet-remote/q15.benchmark | 7 - .../tpch/sf1/parquet-remote/q16.benchmark | 7 - .../tpch/sf1/parquet-remote/q17.benchmark | 7 - .../tpch/sf1/parquet-remote/q18.benchmark | 7 - .../tpch/sf1/parquet-remote/q19.benchmark | 7 - .../tpch/sf1/parquet-remote/q20.benchmark | 7 - .../tpch/sf1/parquet-remote/q21.benchmark | 7 - .../tpch/sf1/parquet-remote/q22.benchmark | 7 - .../tpch_sf1_delta.benchmark.in | 19 -- scripts/generate_test_data.py | 213 ++++++--------- scripts/regression_test_runner.py | 254 ++++++++++++++++++ 104 files changed, 768 insertions(+), 558 deletions(-) create mode 100644 .github/regression/tpcds_sf1_local.csv create mode 100644 .github/regression/tpch_sf1_local.csv delete mode 100644 benchmark/tpch/sf1/delta-remote/load.sql delete mode 100644 benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in delete mode 100644 benchmark/tpch/sf1/delta/q01.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q02.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q03.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q04.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q05.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q06.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q07.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q08.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q09.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q10.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q11.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q12.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q13.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q14.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q15.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q16.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q17.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q18.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q19.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q20.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q21.benchmark delete mode 100644 benchmark/tpch/sf1/delta/q22.benchmark rename benchmark/tpch/sf1/{ => local}/delta/load.sql (100%) create mode 100644 benchmark/tpch/sf1/local/delta/q01.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q02.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q03.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q04.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q05.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q06.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q07.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q08.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q09.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q10.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q11.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q12.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q13.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q14.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q15.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q16.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q17.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q18.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q19.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q20.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q21.benchmark create mode 100644 benchmark/tpch/sf1/local/delta/q22.benchmark rename benchmark/tpch/sf1/{ => local}/delta/tpch_sf1_delta.benchmark.in (86%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q01.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q02.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q03.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q04.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q05.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q06.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q07.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q08.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q09.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q10.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q11.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q12.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q13.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q14.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q15.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q16.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q17.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q18.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q19.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q20.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q21.benchmark (50%) rename benchmark/tpch/sf1/{delta-remote => local/duckdb}/q22.benchmark (50%) create mode 100644 benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in delete mode 100644 benchmark/tpch/sf1/parquet-remote/load.sql delete mode 100644 benchmark/tpch/sf1/parquet-remote/q01.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q02.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q03.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q04.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q05.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q06.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q07.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q08.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q09.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q10.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q11.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q12.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q13.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q14.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q15.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q16.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q17.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q18.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q19.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q20.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q21.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/q22.benchmark delete mode 100644 benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in create mode 100644 scripts/regression_test_runner.py diff --git a/.github/regression/tpcds_sf1_local.csv b/.github/regression/tpcds_sf1_local.csv new file mode 100644 index 0000000..5eef84b --- /dev/null +++ b/.github/regression/tpcds_sf1_local.csv @@ -0,0 +1,100 @@ +benchmark/tpcds/sf1/local/delta/q01.benchmark +benchmark/tpcds/sf1/local/delta/q02.benchmark +benchmark/tpcds/sf1/local/delta/q03.benchmark +benchmark/tpcds/sf1/local/delta/q04.benchmark +benchmark/tpcds/sf1/local/delta/q05.benchmark +benchmark/tpcds/sf1/local/delta/q06.benchmark +benchmark/tpcds/sf1/local/delta/q07.benchmark +benchmark/tpcds/sf1/local/delta/q08.benchmark +benchmark/tpcds/sf1/local/delta/q09.benchmark +benchmark/tpcds/sf1/local/delta/q10.benchmark +benchmark/tpcds/sf1/local/delta/q11.benchmark +benchmark/tpcds/sf1/local/delta/q12.benchmark +benchmark/tpcds/sf1/local/delta/q13.benchmark +benchmark/tpcds/sf1/local/delta/q14.benchmark +benchmark/tpcds/sf1/local/delta/q15.benchmark +benchmark/tpcds/sf1/local/delta/q16.benchmark +benchmark/tpcds/sf1/local/delta/q17.benchmark +benchmark/tpcds/sf1/local/delta/q18.benchmark +benchmark/tpcds/sf1/local/delta/q19.benchmark +benchmark/tpcds/sf1/local/delta/q20.benchmark +benchmark/tpcds/sf1/local/delta/q21.benchmark +benchmark/tpcds/sf1/local/delta/q22.benchmark +benchmark/tpcds/sf1/local/delta/q23.benchmark +benchmark/tpcds/sf1/local/delta/q24.benchmark +benchmark/tpcds/sf1/local/delta/q25.benchmark +benchmark/tpcds/sf1/local/delta/q26.benchmark +benchmark/tpcds/sf1/local/delta/q27.benchmark +benchmark/tpcds/sf1/local/delta/q28.benchmark + +benchmark/tpcds/sf1/local/delta/q29.benchmark +benchmark/tpcds/sf1/local/delta/q30.benchmark +benchmark/tpcds/sf1/local/delta/q31.benchmark +benchmark/tpcds/sf1/local/delta/q32.benchmark +benchmark/tpcds/sf1/local/delta/q33.benchmark +benchmark/tpcds/sf1/local/delta/q34.benchmark +benchmark/tpcds/sf1/local/delta/q35.benchmark +benchmark/tpcds/sf1/local/delta/q36.benchmark +benchmark/tpcds/sf1/local/delta/q37.benchmark +benchmark/tpcds/sf1/local/delta/q38.benchmark +benchmark/tpcds/sf1/local/delta/q39.benchmark +benchmark/tpcds/sf1/local/delta/q40.benchmark +benchmark/tpcds/sf1/local/delta/q41.benchmark +benchmark/tpcds/sf1/local/delta/q42.benchmark +benchmark/tpcds/sf1/local/delta/q43.benchmark +benchmark/tpcds/sf1/local/delta/q44.benchmark +benchmark/tpcds/sf1/local/delta/q45.benchmark +benchmark/tpcds/sf1/local/delta/q46.benchmark +benchmark/tpcds/sf1/local/delta/q47.benchmark +benchmark/tpcds/sf1/local/delta/q48.benchmark +benchmark/tpcds/sf1/local/delta/q49.benchmark +benchmark/tpcds/sf1/local/delta/q50.benchmark +benchmark/tpcds/sf1/local/delta/q51.benchmark +benchmark/tpcds/sf1/local/delta/q52.benchmark +benchmark/tpcds/sf1/local/delta/q53.benchmark +benchmark/tpcds/sf1/local/delta/q54.benchmark +benchmark/tpcds/sf1/local/delta/q55.benchmark +benchmark/tpcds/sf1/local/delta/q56.benchmark +benchmark/tpcds/sf1/local/delta/q57.benchmark +benchmark/tpcds/sf1/local/delta/q58.benchmark +benchmark/tpcds/sf1/local/delta/q59.benchmark +benchmark/tpcds/sf1/local/delta/q60.benchmark +benchmark/tpcds/sf1/local/delta/q61.benchmark +benchmark/tpcds/sf1/local/delta/q62.benchmark +benchmark/tpcds/sf1/local/delta/q63.benchmark +benchmark/tpcds/sf1/local/delta/q64.benchmark +benchmark/tpcds/sf1/local/delta/q65.benchmark +benchmark/tpcds/sf1/local/delta/q66.benchmark +benchmark/tpcds/sf1/local/delta/q67.benchmark +benchmark/tpcds/sf1/local/delta/q68.benchmark +benchmark/tpcds/sf1/local/delta/q69.benchmark +benchmark/tpcds/sf1/local/delta/q70.benchmark +benchmark/tpcds/sf1/local/delta/q71.benchmark +benchmark/tpcds/sf1/local/delta/q72.benchmark +benchmark/tpcds/sf1/local/delta/q73.benchmark +benchmark/tpcds/sf1/local/delta/q74.benchmark +benchmark/tpcds/sf1/local/delta/q75.benchmark +benchmark/tpcds/sf1/local/delta/q76.benchmark +benchmark/tpcds/sf1/local/delta/q77.benchmark +benchmark/tpcds/sf1/local/delta/q78.benchmark +benchmark/tpcds/sf1/local/delta/q79.benchmark +benchmark/tpcds/sf1/local/delta/q80.benchmark +benchmark/tpcds/sf1/local/delta/q81.benchmark +benchmark/tpcds/sf1/local/delta/q82.benchmark +benchmark/tpcds/sf1/local/delta/q83.benchmark +benchmark/tpcds/sf1/local/delta/q84.benchmark +benchmark/tpcds/sf1/local/delta/q85.benchmark +benchmark/tpcds/sf1/local/delta/q86.benchmark +benchmark/tpcds/sf1/local/delta/q87.benchmark +benchmark/tpcds/sf1/local/delta/q88.benchmark +benchmark/tpcds/sf1/local/delta/q89.benchmark +benchmark/tpcds/sf1/local/delta/q90.benchmark +benchmark/tpcds/sf1/local/delta/q91.benchmark +benchmark/tpcds/sf1/local/delta/q92.benchmark +benchmark/tpcds/sf1/local/delta/q93.benchmark +benchmark/tpcds/sf1/local/delta/q94.benchmark +benchmark/tpcds/sf1/local/delta/q95.benchmark +benchmark/tpcds/sf1/local/delta/q96.benchmark +benchmark/tpcds/sf1/local/delta/q97.benchmark +benchmark/tpcds/sf1/local/delta/q98.benchmark +benchmark/tpcds/sf1/local/delta/q99.benchmark diff --git a/.github/regression/tpch_sf1_local.csv b/.github/regression/tpch_sf1_local.csv new file mode 100644 index 0000000..f98745c --- /dev/null +++ b/.github/regression/tpch_sf1_local.csv @@ -0,0 +1,22 @@ +benchmark/tpch/sf1/delta/q01.benchmark +benchmark/tpch/sf1/delta/q02.benchmark +benchmark/tpch/sf1/delta/q03.benchmark +benchmark/tpch/sf1/delta/q04.benchmark +benchmark/tpch/sf1/delta/q05.benchmark +benchmark/tpch/sf1/delta/q06.benchmark +benchmark/tpch/sf1/delta/q07.benchmark +benchmark/tpch/sf1/delta/q08.benchmark +benchmark/tpch/sf1/delta/q09.benchmark +benchmark/tpch/sf1/delta/q10.benchmark +benchmark/tpch/sf1/delta/q11.benchmark +benchmark/tpch/sf1/delta/q12.benchmark +benchmark/tpch/sf1/delta/q13.benchmark +benchmark/tpch/sf1/delta/q14.benchmark +benchmark/tpch/sf1/delta/q15.benchmark +benchmark/tpch/sf1/delta/q16.benchmark +benchmark/tpch/sf1/delta/q17.benchmark +benchmark/tpch/sf1/delta/q18.benchmark +benchmark/tpch/sf1/delta/q19.benchmark +benchmark/tpch/sf1/delta/q20.benchmark +benchmark/tpch/sf1/delta/q21.benchmark +benchmark/tpch/sf1/delta/q22.benchmark diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 5b6a4cb..1a5aa4e 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -7,6 +7,9 @@ defaults: run: shell: bash +env: + BASE_BRANCH: ${{ github.base_ref || (endsWith(github.ref, '_feature') && 'feature' || 'main') }} + jobs: azurite-tests-linux: name: Azurite (local azure test server) tests (Linux) @@ -213,4 +216,75 @@ jobs: - name: Test shell: bash run: | - GENERATED_DATA_AVAILABLE=1 make test \ No newline at end of file + GENERATED_DATA_AVAILABLE=1 make test + + regression-test-benchmark-runner: + name: Performance Regression Tests + runs-on: ubuntu-latest + env: + GEN: ninja + BUILD_BENCHMARK: 1 + VCPKG_TARGET_TRIPLET: x64-linux + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build && pip install requests + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + with: + key: ${{ github.job }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - name: Configure OpenSSL for Rust + run: | + echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV + echo "OPENSSL_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV + echo "OPENSSL_USE_STATIC_LIBS=true" >> $GITHUB_ENV + + - name: Build + shell: bash + run: | + make + git clone --branch ${{ env.BASE_BRANCH }} https://github.com/duckdb/duckdb_delta.git --depth=1 + cd duckdb_delta + git submodule init + git submodule update + make + cd .. + + - name: Generate test data + shell: bash + run: make generate-data + + - name: Regression Test TPC-H + if: always() + shell: bash + run: | + python3 duckdb/scripts/regression_test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpch_sf1_local.csv --verbose --threads=2 --root-dir=. + + - name: Regression Test TPC-DS + if: always() + shell: bash + run: | + python duckdb/scripts/regression_test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpcds_sf1_local.csv --verbose --threads=2 --root-dir=. + + - name: Test benchmark makefile + shell: bash + run: | + make bench-run-tpch-sf1 + make bench-run-tpcds-sf1 \ No newline at end of file diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile index 7453e7e..9766ef1 100644 --- a/benchmark/benchmark.Makefile +++ b/benchmark/benchmark.Makefile @@ -8,7 +8,6 @@ ifeq ("$(IO_MODE)a", "a") IO_MODE:=local endif - bench-output-dir: mkdir -p benchmark_results @@ -18,38 +17,24 @@ clean_benchmark: plot: python3 scripts/plot.py - ############### BENCHMARK TARGETS ############### ### -# TPCH LOCAL +# TPCH ### # TPCH SF1 on delta table bench-run-tpch-sf1-delta: bench-output-dir - ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/delta/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-delta.csv + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/local/delta/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-delta.csv # TPCH SF1 on parquet files bench-run-tpch-sf1-parquet: bench-output-dir - ./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1/parquet/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-parquet.csv + ./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-parquet.csv # TPCH SF1 on duckdb file bench-run-tpch-sf1-duckdb: bench-output-dir - ./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-duckdb.csv -# COMPARES TPCH SF1 on parquet file vs on delta files + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/local/duckdb/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-duckdb.csv +# COMPARES TPCH SF1 on parquet file vs on delta files vs on duckdb files bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet -### -# TPCH REMOTE -### - -# TPCH on remote delta table (set BENCHMARK_DATA_S3_LINEITEM_SF1) -bench-run-tpch-sf1-remote-delta: bench-output-dir - ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/delta-remote/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-remote-delta.csv -# TPCH on remote parquet table (set BENCHMARK_DATA_S3_LINEITEM_SF1) -bench-run-tpch-sf1-remote-parquet: bench-output-dir - ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/parquet-remote/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-remote-parquet.csv -# COMPARES TPCH SF1 on parquet file vs on delta files -bench-run-tpch-sf1-remote: bench-run-tpch-sf1-remote-parquet bench-run-tpch-sf1-remote-delta - ### # TPCDS ### diff --git a/benchmark/tpcds/sf1/local/delta/load.sql b/benchmark/tpcds/sf1/local/delta/load.sql index 1d87455..ecb2c2b 100644 --- a/benchmark/tpcds/sf1/local/delta/load.sql +++ b/benchmark/tpcds/sf1/local/delta/load.sql @@ -1,4 +1,4 @@ -SET VARIABLE delta_path = '/mount/delta_benchmarking/tpcds_sf1_pyspark'; +SET VARIABLE delta_path = './data/generated/tpcds_sf1'; create view call_center as from delta_scan(getvariable('delta_path') || '/call_center/delta_lake'); create view catalog_page as from delta_scan(getvariable('delta_path') || '/catalog_page/delta_lake'); diff --git a/benchmark/tpcds/sf1/local/duckdb/tpcds_sf1.benchmark.in b/benchmark/tpcds/sf1/local/duckdb/tpcds_sf1.benchmark.in index 992983a..052396f 100644 --- a/benchmark/tpcds/sf1/local/duckdb/tpcds_sf1.benchmark.in +++ b/benchmark/tpcds/sf1/local/duckdb/tpcds_sf1.benchmark.in @@ -13,7 +13,7 @@ require parquet require tpcds run -attach '/mount/delta_benchmarking/tpcds_sf1_pyspark/duckdb.db' as tpcds; +attach './data/generated/tpcds_sf1/duckdb.db' as tpcds; use tpcds; pragma tpcds(${QUERY_NUMBER}) diff --git a/benchmark/tpcds/sf1/local/parquet/load.sql b/benchmark/tpcds/sf1/local/parquet/load.sql index 318e6a7..94a8da4 100644 --- a/benchmark/tpcds/sf1/local/parquet/load.sql +++ b/benchmark/tpcds/sf1/local/parquet/load.sql @@ -1,4 +1,4 @@ -SET VARIABLE parquet_path = '/mount/delta_benchmarking/tpcds_sf1_pyspark'; +SET VARIABLE parquet_path = './data/generated/tpcds_sf1'; create view call_center as from parquet_scan(getvariable('parquet_path') || '/call_center/parquet/**/*.parquet'); create view catalog_page as from parquet_scan(getvariable('parquet_path') || '/catalog_page/parquet/**/*.parquet'); diff --git a/benchmark/tpch/sf1/delta-remote/load.sql b/benchmark/tpch/sf1/delta-remote/load.sql deleted file mode 100644 index a095ffd..0000000 --- a/benchmark/tpch/sf1/delta-remote/load.sql +++ /dev/null @@ -1,8 +0,0 @@ -create view customer as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/customer/delta_lake'); -create view lineitem as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/lineitem/delta_lake'); -create view nation as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/nation/delta_lake'); -create view orders as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/orders/delta_lake'); -create view part as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/part/delta_lake'); -create view partsupp as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/partsupp/delta_lake'); -create view region as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/region/delta_lake'); -create view supplier as from delta_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/supplier/delta_lake'); \ No newline at end of file diff --git a/benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in deleted file mode 100644 index feefacc..0000000 --- a/benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +++ /dev/null @@ -1,19 +0,0 @@ -# name: ${FILE_PATH} -# description: ${DESCRIPTION} -# group: [sf1] - -name Q${QUERY_NUMBER_PADDED} -group tpch -subgroup sf1 - -require delta - -require parquet - -require httpfs - -load benchmark/tpch/sf1/delta-remote/load.sql - -run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql - -result duckdb/extension/tpch/dbgen/answers/sf0.01/q${QUERY_NUMBER_PADDED}.csv \ No newline at end of file diff --git a/benchmark/tpch/sf1/delta/q01.benchmark b/benchmark/tpch/sf1/delta/q01.benchmark deleted file mode 100644 index 4e3eac8..0000000 --- a/benchmark/tpch/sf1/delta/q01.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q01.benchmark -# description: Run query 01 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=1 -QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpch/sf1/delta/q02.benchmark b/benchmark/tpch/sf1/delta/q02.benchmark deleted file mode 100644 index 38697ce..0000000 --- a/benchmark/tpch/sf1/delta/q02.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q02.benchmark -# description: Run query 02 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=2 -QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpch/sf1/delta/q03.benchmark b/benchmark/tpch/sf1/delta/q03.benchmark deleted file mode 100644 index 3ab273e..0000000 --- a/benchmark/tpch/sf1/delta/q03.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q03.benchmark -# description: Run query 03 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=3 -QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpch/sf1/delta/q04.benchmark b/benchmark/tpch/sf1/delta/q04.benchmark deleted file mode 100644 index 6c19561..0000000 --- a/benchmark/tpch/sf1/delta/q04.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q04.benchmark -# description: Run query 04 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=4 -QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpch/sf1/delta/q05.benchmark b/benchmark/tpch/sf1/delta/q05.benchmark deleted file mode 100644 index fc2b61b..0000000 --- a/benchmark/tpch/sf1/delta/q05.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q05.benchmark -# description: Run query 05 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=5 -QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpch/sf1/delta/q06.benchmark b/benchmark/tpch/sf1/delta/q06.benchmark deleted file mode 100644 index b9b1dd4..0000000 --- a/benchmark/tpch/sf1/delta/q06.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q06.benchmark -# description: Run query 06 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=6 -QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpch/sf1/delta/q07.benchmark b/benchmark/tpch/sf1/delta/q07.benchmark deleted file mode 100644 index 2a38953..0000000 --- a/benchmark/tpch/sf1/delta/q07.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q07.benchmark -# description: Run query 07 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=7 -QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpch/sf1/delta/q08.benchmark b/benchmark/tpch/sf1/delta/q08.benchmark deleted file mode 100644 index 6321d19..0000000 --- a/benchmark/tpch/sf1/delta/q08.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q08.benchmark -# description: Run query 08 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=8 -QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpch/sf1/delta/q09.benchmark b/benchmark/tpch/sf1/delta/q09.benchmark deleted file mode 100644 index 9ca116e..0000000 --- a/benchmark/tpch/sf1/delta/q09.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q09.benchmark -# description: Run query 09 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=9 -QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpch/sf1/delta/q10.benchmark b/benchmark/tpch/sf1/delta/q10.benchmark deleted file mode 100644 index 9ffde79..0000000 --- a/benchmark/tpch/sf1/delta/q10.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q10.benchmark -# description: Run query 10 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=10 -QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpch/sf1/delta/q11.benchmark b/benchmark/tpch/sf1/delta/q11.benchmark deleted file mode 100644 index fb0a839..0000000 --- a/benchmark/tpch/sf1/delta/q11.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q11.benchmark -# description: Run query 11 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=11 -QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpch/sf1/delta/q12.benchmark b/benchmark/tpch/sf1/delta/q12.benchmark deleted file mode 100644 index 79df5ae..0000000 --- a/benchmark/tpch/sf1/delta/q12.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q12.benchmark -# description: Run query 12 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=12 -QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpch/sf1/delta/q13.benchmark b/benchmark/tpch/sf1/delta/q13.benchmark deleted file mode 100644 index 8ace247..0000000 --- a/benchmark/tpch/sf1/delta/q13.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q13.benchmark -# description: Run query 13 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=13 -QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpch/sf1/delta/q14.benchmark b/benchmark/tpch/sf1/delta/q14.benchmark deleted file mode 100644 index abe9797..0000000 --- a/benchmark/tpch/sf1/delta/q14.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q14.benchmark -# description: Run query 14 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=14 -QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpch/sf1/delta/q15.benchmark b/benchmark/tpch/sf1/delta/q15.benchmark deleted file mode 100644 index b832d07..0000000 --- a/benchmark/tpch/sf1/delta/q15.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q15.benchmark -# description: Run query 15 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=15 -QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpch/sf1/delta/q16.benchmark b/benchmark/tpch/sf1/delta/q16.benchmark deleted file mode 100644 index 0be13c7..0000000 --- a/benchmark/tpch/sf1/delta/q16.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q16.benchmark -# description: Run query 16 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=16 -QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpch/sf1/delta/q17.benchmark b/benchmark/tpch/sf1/delta/q17.benchmark deleted file mode 100644 index 79c79c1..0000000 --- a/benchmark/tpch/sf1/delta/q17.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q17.benchmark -# description: Run query 17 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=17 -QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpch/sf1/delta/q18.benchmark b/benchmark/tpch/sf1/delta/q18.benchmark deleted file mode 100644 index 98f5523..0000000 --- a/benchmark/tpch/sf1/delta/q18.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q18.benchmark -# description: Run query 18 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=18 -QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpch/sf1/delta/q19.benchmark b/benchmark/tpch/sf1/delta/q19.benchmark deleted file mode 100644 index 08ce29a..0000000 --- a/benchmark/tpch/sf1/delta/q19.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q19.benchmark -# description: Run query 19 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=19 -QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpch/sf1/delta/q20.benchmark b/benchmark/tpch/sf1/delta/q20.benchmark deleted file mode 100644 index 6bd5b91..0000000 --- a/benchmark/tpch/sf1/delta/q20.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q20.benchmark -# description: Run query 20 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=20 -QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpch/sf1/delta/q21.benchmark b/benchmark/tpch/sf1/delta/q21.benchmark deleted file mode 100644 index b62ea2d..0000000 --- a/benchmark/tpch/sf1/delta/q21.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q21.benchmark -# description: Run query 21 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=21 -QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpch/sf1/delta/q22.benchmark b/benchmark/tpch/sf1/delta/q22.benchmark deleted file mode 100644 index 11c6dc6..0000000 --- a/benchmark/tpch/sf1/delta/q22.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1/delta/q22.benchmark -# description: Run query 22 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=22 -QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpch/sf1/delta/load.sql b/benchmark/tpch/sf1/local/delta/load.sql similarity index 100% rename from benchmark/tpch/sf1/delta/load.sql rename to benchmark/tpch/sf1/local/delta/load.sql diff --git a/benchmark/tpch/sf1/local/delta/q01.benchmark b/benchmark/tpch/sf1/local/delta/q01.benchmark new file mode 100644 index 0000000..908a337 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q01.benchmark +# description: Run query 01 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpch/sf1/local/delta/q02.benchmark b/benchmark/tpch/sf1/local/delta/q02.benchmark new file mode 100644 index 0000000..3225d7e --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q02.benchmark +# description: Run query 02 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpch/sf1/local/delta/q03.benchmark b/benchmark/tpch/sf1/local/delta/q03.benchmark new file mode 100644 index 0000000..6ff95b7 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q03.benchmark +# description: Run query 03 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpch/sf1/local/delta/q04.benchmark b/benchmark/tpch/sf1/local/delta/q04.benchmark new file mode 100644 index 0000000..08233ff --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q04.benchmark +# description: Run query 04 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpch/sf1/local/delta/q05.benchmark b/benchmark/tpch/sf1/local/delta/q05.benchmark new file mode 100644 index 0000000..61177d9 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q05.benchmark +# description: Run query 05 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpch/sf1/local/delta/q06.benchmark b/benchmark/tpch/sf1/local/delta/q06.benchmark new file mode 100644 index 0000000..56b374e --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q06.benchmark +# description: Run query 06 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpch/sf1/local/delta/q07.benchmark b/benchmark/tpch/sf1/local/delta/q07.benchmark new file mode 100644 index 0000000..bc4d19f --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q07.benchmark +# description: Run query 07 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpch/sf1/local/delta/q08.benchmark b/benchmark/tpch/sf1/local/delta/q08.benchmark new file mode 100644 index 0000000..b6ad1d9 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q08.benchmark +# description: Run query 08 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpch/sf1/local/delta/q09.benchmark b/benchmark/tpch/sf1/local/delta/q09.benchmark new file mode 100644 index 0000000..a986686 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q09.benchmark +# description: Run query 09 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpch/sf1/local/delta/q10.benchmark b/benchmark/tpch/sf1/local/delta/q10.benchmark new file mode 100644 index 0000000..ba2655e --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q10.benchmark +# description: Run query 10 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpch/sf1/local/delta/q11.benchmark b/benchmark/tpch/sf1/local/delta/q11.benchmark new file mode 100644 index 0000000..c590175 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q11.benchmark +# description: Run query 11 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpch/sf1/local/delta/q12.benchmark b/benchmark/tpch/sf1/local/delta/q12.benchmark new file mode 100644 index 0000000..74b79d8 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q12.benchmark +# description: Run query 12 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpch/sf1/local/delta/q13.benchmark b/benchmark/tpch/sf1/local/delta/q13.benchmark new file mode 100644 index 0000000..859c301 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q13.benchmark +# description: Run query 13 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpch/sf1/local/delta/q14.benchmark b/benchmark/tpch/sf1/local/delta/q14.benchmark new file mode 100644 index 0000000..7d7c8c3 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q14.benchmark +# description: Run query 14 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpch/sf1/local/delta/q15.benchmark b/benchmark/tpch/sf1/local/delta/q15.benchmark new file mode 100644 index 0000000..82110b5 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q15.benchmark +# description: Run query 15 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpch/sf1/local/delta/q16.benchmark b/benchmark/tpch/sf1/local/delta/q16.benchmark new file mode 100644 index 0000000..f90bc19 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q16.benchmark +# description: Run query 16 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpch/sf1/local/delta/q17.benchmark b/benchmark/tpch/sf1/local/delta/q17.benchmark new file mode 100644 index 0000000..e489990 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q17.benchmark +# description: Run query 17 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpch/sf1/local/delta/q18.benchmark b/benchmark/tpch/sf1/local/delta/q18.benchmark new file mode 100644 index 0000000..d47cf87 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q18.benchmark +# description: Run query 18 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpch/sf1/local/delta/q19.benchmark b/benchmark/tpch/sf1/local/delta/q19.benchmark new file mode 100644 index 0000000..141ccb1 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q19.benchmark +# description: Run query 19 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpch/sf1/local/delta/q20.benchmark b/benchmark/tpch/sf1/local/delta/q20.benchmark new file mode 100644 index 0000000..e1cbb9d --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q20.benchmark +# description: Run query 20 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpch/sf1/local/delta/q21.benchmark b/benchmark/tpch/sf1/local/delta/q21.benchmark new file mode 100644 index 0000000..73eb0bd --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q21.benchmark +# description: Run query 21 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpch/sf1/local/delta/q22.benchmark b/benchmark/tpch/sf1/local/delta/q22.benchmark new file mode 100644 index 0000000..389b11a --- /dev/null +++ b/benchmark/tpch/sf1/local/delta/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta/q22.benchmark +# description: Run query 22 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in similarity index 86% rename from benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in rename to benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in index a4b066a..256c309 100644 --- a/benchmark/tpch/sf1/delta/tpch_sf1_delta.benchmark.in +++ b/benchmark/tpch/sf1/local/delta/tpch_sf1_delta.benchmark.in @@ -10,7 +10,7 @@ require delta require parquet -load benchmark/tpch/sf1/delta/load.sql +load benchmark/tpch/sf1/local/delta/load.sql run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql diff --git a/benchmark/tpch/sf1/delta-remote/q01.benchmark b/benchmark/tpch/sf1/local/duckdb/q01.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q01.benchmark rename to benchmark/tpch/sf1/local/duckdb/q01.benchmark index 8e0fead..6e1a0c4 100644 --- a/benchmark/tpch/sf1/delta-remote/q01.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q01.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q01.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q01.benchmark # description: Run query 01 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=1 QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpch/sf1/delta-remote/q02.benchmark b/benchmark/tpch/sf1/local/duckdb/q02.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q02.benchmark rename to benchmark/tpch/sf1/local/duckdb/q02.benchmark index ec28d85..e8a2635 100644 --- a/benchmark/tpch/sf1/delta-remote/q02.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q02.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q02.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q02.benchmark # description: Run query 02 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=2 QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpch/sf1/delta-remote/q03.benchmark b/benchmark/tpch/sf1/local/duckdb/q03.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q03.benchmark rename to benchmark/tpch/sf1/local/duckdb/q03.benchmark index bbe7e78..91bec2a 100644 --- a/benchmark/tpch/sf1/delta-remote/q03.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q03.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q03.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q03.benchmark # description: Run query 03 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=3 QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpch/sf1/delta-remote/q04.benchmark b/benchmark/tpch/sf1/local/duckdb/q04.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q04.benchmark rename to benchmark/tpch/sf1/local/duckdb/q04.benchmark index 0109582..351fb75 100644 --- a/benchmark/tpch/sf1/delta-remote/q04.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q04.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q04.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q04.benchmark # description: Run query 04 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=4 QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpch/sf1/delta-remote/q05.benchmark b/benchmark/tpch/sf1/local/duckdb/q05.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q05.benchmark rename to benchmark/tpch/sf1/local/duckdb/q05.benchmark index 866f526..60d3c0e 100644 --- a/benchmark/tpch/sf1/delta-remote/q05.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q05.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q05.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q05.benchmark # description: Run query 05 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=5 QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpch/sf1/delta-remote/q06.benchmark b/benchmark/tpch/sf1/local/duckdb/q06.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q06.benchmark rename to benchmark/tpch/sf1/local/duckdb/q06.benchmark index 77be08b..c0f1ada 100644 --- a/benchmark/tpch/sf1/delta-remote/q06.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q06.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q06.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q06.benchmark # description: Run query 06 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=6 QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpch/sf1/delta-remote/q07.benchmark b/benchmark/tpch/sf1/local/duckdb/q07.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q07.benchmark rename to benchmark/tpch/sf1/local/duckdb/q07.benchmark index e354f73..8d7fb7e 100644 --- a/benchmark/tpch/sf1/delta-remote/q07.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q07.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q07.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q07.benchmark # description: Run query 07 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=7 QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpch/sf1/delta-remote/q08.benchmark b/benchmark/tpch/sf1/local/duckdb/q08.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q08.benchmark rename to benchmark/tpch/sf1/local/duckdb/q08.benchmark index 2c89469..ea880db 100644 --- a/benchmark/tpch/sf1/delta-remote/q08.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q08.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q08.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q08.benchmark # description: Run query 08 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=8 QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpch/sf1/delta-remote/q09.benchmark b/benchmark/tpch/sf1/local/duckdb/q09.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q09.benchmark rename to benchmark/tpch/sf1/local/duckdb/q09.benchmark index 4b5c6dd..d89908b 100644 --- a/benchmark/tpch/sf1/delta-remote/q09.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q09.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q09.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q09.benchmark # description: Run query 09 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=9 QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpch/sf1/delta-remote/q10.benchmark b/benchmark/tpch/sf1/local/duckdb/q10.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q10.benchmark rename to benchmark/tpch/sf1/local/duckdb/q10.benchmark index 48bcfd3..649f342 100644 --- a/benchmark/tpch/sf1/delta-remote/q10.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q10.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q10.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q10.benchmark # description: Run query 10 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=10 QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpch/sf1/delta-remote/q11.benchmark b/benchmark/tpch/sf1/local/duckdb/q11.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q11.benchmark rename to benchmark/tpch/sf1/local/duckdb/q11.benchmark index 92520d6..d093170 100644 --- a/benchmark/tpch/sf1/delta-remote/q11.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q11.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q11.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q11.benchmark # description: Run query 11 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=11 QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpch/sf1/delta-remote/q12.benchmark b/benchmark/tpch/sf1/local/duckdb/q12.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q12.benchmark rename to benchmark/tpch/sf1/local/duckdb/q12.benchmark index 8a4b1b4..1da4105 100644 --- a/benchmark/tpch/sf1/delta-remote/q12.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q12.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q12.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q12.benchmark # description: Run query 12 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=12 QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpch/sf1/delta-remote/q13.benchmark b/benchmark/tpch/sf1/local/duckdb/q13.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q13.benchmark rename to benchmark/tpch/sf1/local/duckdb/q13.benchmark index 2402038..a7d1342 100644 --- a/benchmark/tpch/sf1/delta-remote/q13.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q13.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q13.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q13.benchmark # description: Run query 13 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=13 QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpch/sf1/delta-remote/q14.benchmark b/benchmark/tpch/sf1/local/duckdb/q14.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q14.benchmark rename to benchmark/tpch/sf1/local/duckdb/q14.benchmark index 900c2de..e4e57f3 100644 --- a/benchmark/tpch/sf1/delta-remote/q14.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q14.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q14.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q14.benchmark # description: Run query 14 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=14 QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpch/sf1/delta-remote/q15.benchmark b/benchmark/tpch/sf1/local/duckdb/q15.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q15.benchmark rename to benchmark/tpch/sf1/local/duckdb/q15.benchmark index 0cd2fdb..7991990 100644 --- a/benchmark/tpch/sf1/delta-remote/q15.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q15.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q15.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q15.benchmark # description: Run query 15 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=15 QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpch/sf1/delta-remote/q16.benchmark b/benchmark/tpch/sf1/local/duckdb/q16.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q16.benchmark rename to benchmark/tpch/sf1/local/duckdb/q16.benchmark index f66de95..54515cb 100644 --- a/benchmark/tpch/sf1/delta-remote/q16.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q16.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q16.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q16.benchmark # description: Run query 16 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=16 QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpch/sf1/delta-remote/q17.benchmark b/benchmark/tpch/sf1/local/duckdb/q17.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q17.benchmark rename to benchmark/tpch/sf1/local/duckdb/q17.benchmark index a08bdd9..d8e6ed2 100644 --- a/benchmark/tpch/sf1/delta-remote/q17.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q17.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q17.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q17.benchmark # description: Run query 17 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=17 QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpch/sf1/delta-remote/q18.benchmark b/benchmark/tpch/sf1/local/duckdb/q18.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q18.benchmark rename to benchmark/tpch/sf1/local/duckdb/q18.benchmark index 56681e3..9325b65 100644 --- a/benchmark/tpch/sf1/delta-remote/q18.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q18.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q18.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q18.benchmark # description: Run query 18 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=18 QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpch/sf1/delta-remote/q19.benchmark b/benchmark/tpch/sf1/local/duckdb/q19.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q19.benchmark rename to benchmark/tpch/sf1/local/duckdb/q19.benchmark index cc844b9..44885a5 100644 --- a/benchmark/tpch/sf1/delta-remote/q19.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q19.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q19.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q19.benchmark # description: Run query 19 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=19 QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpch/sf1/delta-remote/q20.benchmark b/benchmark/tpch/sf1/local/duckdb/q20.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q20.benchmark rename to benchmark/tpch/sf1/local/duckdb/q20.benchmark index 9564493..ae66625 100644 --- a/benchmark/tpch/sf1/delta-remote/q20.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q20.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q20.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q20.benchmark # description: Run query 20 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=20 QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpch/sf1/delta-remote/q21.benchmark b/benchmark/tpch/sf1/local/duckdb/q21.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q21.benchmark rename to benchmark/tpch/sf1/local/duckdb/q21.benchmark index c3337f5..759bdd3 100644 --- a/benchmark/tpch/sf1/delta-remote/q21.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q21.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q21.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q21.benchmark # description: Run query 21 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=21 QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpch/sf1/delta-remote/q22.benchmark b/benchmark/tpch/sf1/local/duckdb/q22.benchmark similarity index 50% rename from benchmark/tpch/sf1/delta-remote/q22.benchmark rename to benchmark/tpch/sf1/local/duckdb/q22.benchmark index 7eecc4b..dfd3672 100644 --- a/benchmark/tpch/sf1/delta-remote/q22.benchmark +++ b/benchmark/tpch/sf1/local/duckdb/q22.benchmark @@ -1,7 +1,7 @@ -# name: benchmark/tpch/sf1-delta/q22.benchmark +# name: benchmark/tpch/sf1/local/duckdb/q22.benchmark # description: Run query 22 from the TPC-H benchmark # group: [sf1-parquet] -template benchmark/tpch/sf1/delta-remote/tpch_sf1_delta.benchmark.in +template benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in QUERY_NUMBER=22 QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in new file mode 100644 index 0000000..528150b --- /dev/null +++ b/benchmark/tpch/sf1/local/duckdb/tpch_sf1_delta.benchmark.in @@ -0,0 +1,24 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [sf1] + +name Q${QUERY_NUMBER_PADDED} +group tpch +subgroup sf1 + +require delta + +require parquet + +require tpch + +run +attach './data/generated/tpch_sf1/duckdb.db' as tpch; +use tpch; +pragma tpch(${QUERY_NUMBER}) + +cleanup +use memory; +detach tpch; + +result duckdb/extension/tpch/dbgen/answers/sf1/q${QUERY_NUMBER_PADDED}.csv \ No newline at end of file diff --git a/benchmark/tpch/sf1/parquet-remote/load.sql b/benchmark/tpch/sf1/parquet-remote/load.sql deleted file mode 100644 index 23c8ed2..0000000 --- a/benchmark/tpch/sf1/parquet-remote/load.sql +++ /dev/null @@ -1,8 +0,0 @@ -create view customer as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/customer/parquet/**/*.parquet'); -create view lineitem as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/lineitem/parquet/**/*.parquet'); -create view nation as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/nation/parquet/**/*.parquet'); -create view orders as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/orders/parquet/**/*.parquet'); -create view part as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/part/parquet/**/*.parquet'); -create view partsupp as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/partsupp/parquet/**/*.parquet'); -create view region as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/region/parquet/**/*.parquet'); -create view supplier as from parquet_scan('s3://test-bucket-ceiveran/delta_testing/tpch_sf0_01' || '/supplier/parquet/**/*.parquet'); \ No newline at end of file diff --git a/benchmark/tpch/sf1/parquet-remote/q01.benchmark b/benchmark/tpch/sf1/parquet-remote/q01.benchmark deleted file mode 100644 index a9232b7..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q01.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q01.benchmark -# description: Run query 01 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=1 -QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpch/sf1/parquet-remote/q02.benchmark b/benchmark/tpch/sf1/parquet-remote/q02.benchmark deleted file mode 100644 index c9b6519..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q02.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q02.benchmark -# description: Run query 02 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=2 -QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpch/sf1/parquet-remote/q03.benchmark b/benchmark/tpch/sf1/parquet-remote/q03.benchmark deleted file mode 100644 index b3838a2..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q03.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q03.benchmark -# description: Run query 03 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=3 -QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpch/sf1/parquet-remote/q04.benchmark b/benchmark/tpch/sf1/parquet-remote/q04.benchmark deleted file mode 100644 index 9d7da66..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q04.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q04.benchmark -# description: Run query 04 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=4 -QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpch/sf1/parquet-remote/q05.benchmark b/benchmark/tpch/sf1/parquet-remote/q05.benchmark deleted file mode 100644 index 2aae115..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q05.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q05.benchmark -# description: Run query 05 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=5 -QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpch/sf1/parquet-remote/q06.benchmark b/benchmark/tpch/sf1/parquet-remote/q06.benchmark deleted file mode 100644 index 399e407..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q06.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q06.benchmark -# description: Run query 06 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=6 -QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpch/sf1/parquet-remote/q07.benchmark b/benchmark/tpch/sf1/parquet-remote/q07.benchmark deleted file mode 100644 index 1d741f2..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q07.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q07.benchmark -# description: Run query 07 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=7 -QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpch/sf1/parquet-remote/q08.benchmark b/benchmark/tpch/sf1/parquet-remote/q08.benchmark deleted file mode 100644 index 92df476..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q08.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q08.benchmark -# description: Run query 08 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=8 -QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpch/sf1/parquet-remote/q09.benchmark b/benchmark/tpch/sf1/parquet-remote/q09.benchmark deleted file mode 100644 index caede6e..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q09.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q09.benchmark -# description: Run query 09 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=9 -QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpch/sf1/parquet-remote/q10.benchmark b/benchmark/tpch/sf1/parquet-remote/q10.benchmark deleted file mode 100644 index 2f23db9..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q10.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q10.benchmark -# description: Run query 10 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=10 -QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpch/sf1/parquet-remote/q11.benchmark b/benchmark/tpch/sf1/parquet-remote/q11.benchmark deleted file mode 100644 index 076b15f..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q11.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q11.benchmark -# description: Run query 11 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=11 -QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpch/sf1/parquet-remote/q12.benchmark b/benchmark/tpch/sf1/parquet-remote/q12.benchmark deleted file mode 100644 index 182bbd2..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q12.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q12.benchmark -# description: Run query 12 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=12 -QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpch/sf1/parquet-remote/q13.benchmark b/benchmark/tpch/sf1/parquet-remote/q13.benchmark deleted file mode 100644 index f316033..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q13.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q13.benchmark -# description: Run query 13 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=13 -QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpch/sf1/parquet-remote/q14.benchmark b/benchmark/tpch/sf1/parquet-remote/q14.benchmark deleted file mode 100644 index 5cc06ab..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q14.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q14.benchmark -# description: Run query 14 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=14 -QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpch/sf1/parquet-remote/q15.benchmark b/benchmark/tpch/sf1/parquet-remote/q15.benchmark deleted file mode 100644 index 421ccb6..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q15.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q15.benchmark -# description: Run query 15 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=15 -QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpch/sf1/parquet-remote/q16.benchmark b/benchmark/tpch/sf1/parquet-remote/q16.benchmark deleted file mode 100644 index 315d76b..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q16.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q16.benchmark -# description: Run query 16 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=16 -QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpch/sf1/parquet-remote/q17.benchmark b/benchmark/tpch/sf1/parquet-remote/q17.benchmark deleted file mode 100644 index 241e412..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q17.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q17.benchmark -# description: Run query 17 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=17 -QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpch/sf1/parquet-remote/q18.benchmark b/benchmark/tpch/sf1/parquet-remote/q18.benchmark deleted file mode 100644 index 3d2d3d6..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q18.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q18.benchmark -# description: Run query 18 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=18 -QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpch/sf1/parquet-remote/q19.benchmark b/benchmark/tpch/sf1/parquet-remote/q19.benchmark deleted file mode 100644 index 6c47d3d..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q19.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q19.benchmark -# description: Run query 19 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=19 -QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpch/sf1/parquet-remote/q20.benchmark b/benchmark/tpch/sf1/parquet-remote/q20.benchmark deleted file mode 100644 index 54b511d..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q20.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q20.benchmark -# description: Run query 20 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=20 -QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpch/sf1/parquet-remote/q21.benchmark b/benchmark/tpch/sf1/parquet-remote/q21.benchmark deleted file mode 100644 index 965a663..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q21.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q21.benchmark -# description: Run query 21 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=21 -QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpch/sf1/parquet-remote/q22.benchmark b/benchmark/tpch/sf1/parquet-remote/q22.benchmark deleted file mode 100644 index 9f57f1b..0000000 --- a/benchmark/tpch/sf1/parquet-remote/q22.benchmark +++ /dev/null @@ -1,7 +0,0 @@ -# name: benchmark/tpch/sf1-delta/q22.benchmark -# description: Run query 22 from the TPC-H benchmark -# group: [sf1-parquet] - -template benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in -QUERY_NUMBER=22 -QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in deleted file mode 100644 index 73aa48e..0000000 --- a/benchmark/tpch/sf1/parquet-remote/tpch_sf1_delta.benchmark.in +++ /dev/null @@ -1,19 +0,0 @@ -# name: ${FILE_PATH} -# description: ${DESCRIPTION} -# group: [sf1] - -name Q${QUERY_NUMBER_PADDED} -group tpch -subgroup sf1 - -require delta - -require parquet - -require httpfs - -load benchmark/tpch/sf1/parquet-remote/load.sql - -run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql - -result duckdb/extension/tpch/dbgen/answers/sf0.01/q${QUERY_NUMBER_PADDED}.csv \ No newline at end of file diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index 7487681..5df5dc5 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -9,9 +9,8 @@ import math import glob -# BASE_PATH = os.path.dirname(os.path.realpath(__file__)) + "/../data/generated" -BASE_PATH = '/mount/generated' -TMP_PATH = '/mount/tmp' +BASE_PATH = os.path.dirname(os.path.realpath(__file__)) + "/../data/generated" +TMP_PATH = '/tmp' def delete_old_files(): if (os.path.isdir(BASE_PATH)): @@ -46,7 +45,7 @@ def generate_test_data_delta_rs_multi(path, init, tables, splits = 1): file_no = 0 while file_no < splits: os.makedirs(f"{generated_path}/{table['name']}/parquet", exist_ok=True) - # Write DuckDB's reference data + # Write DuckDB's reference data con.sql(f"COPY ({table['query']} where rowid >= {(file_no) * tuples_per_file} and rowid < {(file_no+1) * tuples_per_file}) to '{generated_path}/{table['name']}/parquet/data_{file_no}.parquet' (FORMAT parquet)") file_no += 1 @@ -108,7 +107,7 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate builder = SparkSession.builder.appName("MyApp") \ .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ - .config("spark.driver.memory", "150g") + .config("spark.driver.memory", "8g") spark = configure_spark_with_delta_pip(builder).getOrCreate() @@ -142,128 +141,88 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate # TO CLEAN, uncomment # delete_old_files() -# ### TPCH SF1 in 10 appends -# init = f"call dbgen(sf=0.01);" -# tables = ["customer","lineitem","nation","orders","part","partsupp","region","supplier"] -# queries = [f"from {x}" for x in tables] -# tables = [{'name': x[0], 'query':x[1]} for x in zip(tables,queries)] -# generate_test_data_delta_rs_multi("delta_rs_tpch_sf0_01", init, tables, splits=10) -# - -### TPCDS SF1 -# init = f"call dsdgen(sf=1);" -# tables = ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"] -# queries = [f"from {x}" for x in tables] -# tables = [{'name': x[0], 'query':x[1]} for x in zip(tables,queries)] -# generate_test_data_delta_rs_multi("delta_rs_tpcds_sf1", init, tables, splits=1) -# -# ### TPCDS SF1 -# init = f"call dsdgen(sf=1);" -# tables = ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"] -# queries = [f"from {x}" for x in tables] -# tables = [{'name': x[0], 'query':x[1]} for x in zip(tables,queries)] -# generate_test_data_delta_rs_multi("delta_rs_tpcds_sf1", init, tables, splits=1) - -# -# ### Simple partitioned table -# query = "CREATE table test_table AS SELECT i, i%2 as part from range(0,10) tbl(i);" -# generate_test_data_delta_rs("simple_partitioned", query, "part") -# -# ### Lineitem SF0.01 No partitions -# query = "call dbgen(sf=0.01);" -# query += "CREATE table test_table AS SELECT * as part from lineitem;" -# generate_test_data_delta_rs("lineitem_sf0_01", query) -# -# ### Lineitem SF0.01 10 Partitions -# query = "call dbgen(sf=0.01);" -# query += "CREATE table test_table AS SELECT *, l_orderkey%10 as part from lineitem;" -# generate_test_data_delta_rs("lineitem_sf0_01_10part", query, "part") -# -# ### Lineitem SF1 10 Partitions -# query = "call dbgen(sf=1);" -# query += "CREATE table test_table AS SELECT *, l_orderkey%10 as part from lineitem;" -# generate_test_data_delta_rs("lineitem_sf1_10part", query, "part") -# -# ## Simple table with a blob as a value -# query = "create table test_table as SELECT encode('ABCDE') as blob, encode('ABCDE') as blob_part, 'ABCDE' as string UNION ALL SELECT encode('😈') as blob, encode('😈') as blob_part, '😈' as string" -# generate_test_data_delta_rs("simple_blob_table", query, "blob_part", add_golden_table=False) -# -# ## Simple partitioned table with structs -# query = "CREATE table test_table AS SELECT {'i':i, 'j':i+1} as value, i%2 as part from range(0,10) tbl(i);" -# generate_test_data_delta_rs("simple_partitioned_with_structs", query, "part") -# -# ## Partitioned table with all types we can file skip on -# for type in ["bool", "int", "tinyint", "smallint", "bigint", "float", "double", "varchar"]: -# query = f"CREATE table test_table as select i::{type} as value, i::{type} as part from range(0,2) tbl(i)" -# generate_test_data_delta_rs(f"test_file_skipping/{type}", query, "part") -# -# ## Simple table with deletion vector -# con = duckdb.connect() -# con.query(f"COPY (SELECT i as id, ('val' || i::VARCHAR) as value FROM range(0,1000000) tbl(i))TO '{TMP_PATH}/simple_sf1_with_dv.parquet'") -# generate_test_data_pyspark('simple_sf1_with_dv', 'simple_sf1_with_dv', f'{TMP_PATH}/simple_sf1_with_dv.parquet', "id % 1000 = 0") -# -# ## Lineitem SF0.01 with deletion vector -# con = duckdb.connect() -# con.query(f"call dbgen(sf=0.01); COPY (from lineitem) TO '{TMP_PATH}/modified_lineitem_sf0_01.parquet'") -# generate_test_data_pyspark('lineitem_sf0_01_with_dv', 'lineitem_sf0_01_with_dv', f'{TMP_PATH}/modified_lineitem_sf0_01.parquet', "l_shipdate = '1994-01-01'") -# -# ## Lineitem SF1 with deletion vector -# con = duckdb.connect() -# con.query(f"call dbgen(sf=1); COPY (from lineitem) TO '{TMP_PATH}/modified_lineitem_sf1.parquet'") -# generate_test_data_pyspark('lineitem_sf1_with_dv', 'lineitem_sf1_with_dv', f'{TMP_PATH}/modified_lineitem_sf1.parquet', "l_shipdate = '1994-01-01'") -# -# ## TPCH SF0.01 full dataset -# con = duckdb.connect() -# con.query(f"call dbgen(sf=0.01); EXPORT DATABASE '{TMP_PATH}/tpch_sf0_01_export' (FORMAT parquet)") -# for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]: -# generate_test_data_pyspark(f"tpch_sf0_01_{table}", f'tpch_sf0_01/{table}', f'{TMP_PATH}/tpch_sf0_01_export/{table}.parquet') -# -# ## TPCH SF1 full dataset -# con = duckdb.connect() -# con.query(f"call dbgen(sf=1); EXPORT DATABASE '{TMP_PATH}/tpch_sf1_export' (FORMAT parquet)") -# for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]: -# generate_test_data_pyspark(f"tpch_sf1_{table}", f'tpch_sf1/{table}', f'{TMP_PATH}/tpch_sf1_export/{table}.parquet') -# -# ## TPCDS SF0.01 full dataset -# con = duckdb.connect() -# con.query(f"call dsdgen(sf=0.01); EXPORT DATABASE '{TMP_PATH}/tpcds_sf0_01_export' (FORMAT parquet)") -# for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: -# generate_test_data_pyspark(f"tpcds_sf0_01_{table}", f'tpcds_sf0_01/{table}', f'{TMP_PATH}/tpcds_sf0_01_export/{table}.parquet') +### TPCH SF1 +init = "call dbgen(sf=1);" +tables = ["customer","lineitem","nation","orders","part","partsupp","region","supplier"] +queries = [f"from {x}" for x in tables] +tables = [{'name': x[0], 'query':x[1]} for x in zip(tables,queries)] +generate_test_data_delta_rs_multi("delta_rs_tpch_sf1", init, tables) + +### Simple partitioned table +query = "CREATE table test_table AS SELECT i, i%2 as part from range(0,10) tbl(i);" +generate_test_data_delta_rs("simple_partitioned", query, "part") + +### Lineitem SF0.01 No partitions +query = "call dbgen(sf=0.01);" +query += "CREATE table test_table AS SELECT * as part from lineitem;" +generate_test_data_delta_rs("lineitem_sf0_01", query) + +### Lineitem SF0.01 10 Partitions +query = "call dbgen(sf=0.01);" +query += "CREATE table test_table AS SELECT *, l_orderkey%10 as part from lineitem;" +generate_test_data_delta_rs("lineitem_sf0_01_10part", query, "part") + +### Lineitem SF1 10 Partitions +query = "call dbgen(sf=1);" +query += "CREATE table test_table AS SELECT *, l_orderkey%10 as part from lineitem;" +generate_test_data_delta_rs("lineitem_sf1_10part", query, "part") + +## Simple table with a blob as a value +query = "create table test_table as SELECT encode('ABCDE') as blob, encode('ABCDE') as blob_part, 'ABCDE' as string UNION ALL SELECT encode('😈') as blob, encode('😈') as blob_part, '😈' as string" +generate_test_data_delta_rs("simple_blob_table", query, "blob_part", add_golden_table=False) + +## Simple partitioned table with structs +query = "CREATE table test_table AS SELECT {'i':i, 'j':i+1} as value, i%2 as part from range(0,10) tbl(i);" +generate_test_data_delta_rs("simple_partitioned_with_structs", query, "part") + +## Partitioned table with all types we can file skip on +for type in ["bool", "int", "tinyint", "smallint", "bigint", "float", "double", "varchar"]: + query = f"CREATE table test_table as select i::{type} as value, i::{type} as part from range(0,2) tbl(i)" + generate_test_data_delta_rs(f"test_file_skipping/{type}", query, "part") + +## Simple table with deletion vector +con = duckdb.connect() +con.query(f"COPY (SELECT i as id, ('val' || i::VARCHAR) as value FROM range(0,1000000) tbl(i))TO '{TMP_PATH}/simple_sf1_with_dv.parquet'") +generate_test_data_pyspark('simple_sf1_with_dv', 'simple_sf1_with_dv', f'{TMP_PATH}/simple_sf1_with_dv.parquet', "id % 1000 = 0") -## TPCDS SF1 full dataset +## Lineitem SF0.01 with deletion vector con = duckdb.connect() -con.query(f"call dsdgen(sf=1); EXPORT DATABASE '{TMP_PATH}/tpcds_sf1_export' (FORMAT parquet)") -for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: - generate_test_data_pyspark(f"tpcds_sf1_{table}", f'tpcds_sf1/{table}', f'{TMP_PATH}/tpcds_sf1_export/{table}.parquet') -con.query(f"attach '{BASE_PATH + '/tpcds_sf1/duckdb.db'}' as duckdb_out") -for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: - con.query(f"create table duckdb_out.{table} as from {table}") - -print("DONE WITH SF1") -input() - -# ## TPCDS SF10 full dataset -# con = duckdb.connect() -# con.query(f"call dsdgen(sf=10); EXPORT DATABASE '{TMP_PATH}/tpcds_sf10_export' (FORMAT parquet)") -# for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: -# generate_test_data_pyspark(f"tpcds_sf10_{table}", f'tpcds_sf10/{table}', f'{TMP_PATH}/tpcds_sf10_export/{table}.parquet') -# con.query(f"attach '{BASE_PATH + '/tpcds_sf10/duckdb.db'}' as duckdb_out") -# for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: -# con.query(f"create table duckdb_out.{table} as from {table}") -# -# print("DONE WITH SF10") -# input() - -## TPCDS SF100 full dataset -con = duckdb.connect(f'{TMP_PATH}/tpcds_sf100_duckdb_file') -print("generating SF100") -con.query(f"set memory_limit='140GB';") -con.query(f"call dsdgen(sf=100);") -print("exporting SF100") -con.query(f"EXPORT DATABASE '{TMP_PATH}/tpcds_sf100_export' (FORMAT parquet)") -print("Starting spark to write each table") -for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: - generate_test_data_pyspark(f"tpcds_sf100_{table}", f'tpcds_sf100/{table}', f'{TMP_PATH}/tpcds_sf100_export/{table}.parquet') -con.query(f"attach '{BASE_PATH + '/tpcds_sf100/duckdb.db'}' as duckdb_out") +con.query(f"call dbgen(sf=0.01); COPY (from lineitem) TO '{TMP_PATH}/modified_lineitem_sf0_01.parquet'") +generate_test_data_pyspark('lineitem_sf0_01_with_dv', 'lineitem_sf0_01_with_dv', f'{TMP_PATH}/modified_lineitem_sf0_01.parquet', "l_shipdate = '1994-01-01'") + +## Lineitem SF1 with deletion vector +con = duckdb.connect() +con.query(f"call dbgen(sf=1); COPY (from lineitem) TO '{TMP_PATH}/modified_lineitem_sf1.parquet'") +generate_test_data_pyspark('lineitem_sf1_with_dv', 'lineitem_sf1_with_dv', f'{TMP_PATH}/modified_lineitem_sf1.parquet', "l_shipdate = '1994-01-01'") + +## TPCH SF0.01 full dataset +con = duckdb.connect() +con.query(f"call dbgen(sf=0.01); EXPORT DATABASE '{TMP_PATH}/tpch_sf0_01_export' (FORMAT parquet)") +for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]: + generate_test_data_pyspark(f"tpch_sf0_01_{table}", f'tpch_sf0_01/{table}', f'{TMP_PATH}/tpch_sf0_01_export/{table}.parquet') + +## TPCDS SF0.01 full dataset +con = duckdb.connect() +con.query(f"call dsdgen(sf=0.01); EXPORT DATABASE '{TMP_PATH}/tpcds_sf0_01_export' (FORMAT parquet)") for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: - con.query(f"create table duckdb_out.{table} as from {table}") \ No newline at end of file + generate_test_data_pyspark(f"tpcds_sf0_01_{table}", f'tpcds_sf0_01/{table}', f'{TMP_PATH}/tpcds_sf0_01_export/{table}.parquet') + +## TPCH SF1 full dataset +if (not os.path.isdir(BASE_PATH + '/tpch_sf1')): + con = duckdb.connect() + con.query(f"call dbgen(sf=1); EXPORT DATABASE '{TMP_PATH}/tpch_sf1_export' (FORMAT parquet)") + for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]: + generate_test_data_pyspark(f"tpch_sf1_{table}", f'tpch_sf1/{table}', f'{TMP_PATH}/tpch_sf1_export/{table}.parquet') + con.query(f"attach '{BASE_PATH + '/tpch_sf1/duckdb.db'}' as duckdb_out") + for table in ["customer","lineitem","nation","orders","part","partsupp","region","supplier"]: + con.query(f"create table duckdb_out.{table} as from {table}") + +## TPCDS SF1 full dataset +if (not os.path.isdir(BASE_PATH + '/tpcds_sf1')): + con = duckdb.connect() + con.query(f"call dsdgen(sf=1); EXPORT DATABASE '{TMP_PATH}/tpcds_sf1_export' (FORMAT parquet)") + for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: + generate_test_data_pyspark(f"tpcds_sf1_{table}", f'tpcds_sf1/{table}', f'{TMP_PATH}/tpcds_sf1_export/{table}.parquet') + con.query(f"attach '{BASE_PATH + '/tpcds_sf1/duckdb.db'}' as duckdb_out") + for table in ["call_center","catalog_page","catalog_returns","catalog_sales","customer","customer_demographics","customer_address","date_dim","household_demographics","inventory","income_band","item","promotion","reason","ship_mode","store","store_returns","store_sales","time_dim","warehouse","web_page","web_returns","web_sales","web_site"]: + con.query(f"create table duckdb_out.{table} as from {table}") \ No newline at end of file diff --git a/scripts/regression_test_runner.py b/scripts/regression_test_runner.py new file mode 100644 index 0000000..c8d876c --- /dev/null +++ b/scripts/regression_test_runner.py @@ -0,0 +1,254 @@ +# FIXME, this script is copied from github.com/duckdb/duckdb/scripts/regression_test_runner.py, +# only the root-dir param was added here, so this script can be removed once that is available upstream + + +import os +import sys +import subprocess +from io import StringIO +import csv +import statistics +import math +import functools +import shutil + +print = functools.partial(print, flush=True) + + +def is_number(s): + try: + float(s) + return True + except ValueError: + return False + + +# Geometric mean of an array of numbers +def geomean(xs): + if len(xs) == 0: + return 'EMPTY' + for entry in xs: + if not is_number(entry): + return entry + return math.exp(math.fsum(math.log(float(x)) for x in xs) / len(xs)) + + +# how many times we will run the experiment, to be sure of the regression +number_repetitions = 5 +# the threshold at which we consider something a regression (percentage) +regression_threshold_percentage = 0.1 +# minimal seconds diff for something to be a regression (for very fast benchmarks) +regression_threshold_seconds = 0.05 + +old_runner = None +new_runner = None +benchmark_file = None +verbose = False +threads = None +no_regression_fail = False +disable_timeout = False +max_timeout = 3600 +root_dir = "" +for arg in sys.argv: + if arg.startswith("--old="): + old_runner = arg.replace("--old=", "") + elif arg.startswith("--new="): + new_runner = arg.replace("--new=", "") + elif arg.startswith("--benchmarks="): + benchmark_file = arg.replace("--benchmarks=", "") + elif arg == "--verbose": + verbose = True + elif arg.startswith("--threads="): + threads = int(arg.replace("--threads=", "")) + elif arg.startswith("--nofail"): + no_regression_fail = True + elif arg == "--disable-timeout": + disable_timeout = True + elif arg.startswith("--root-dir="): + root_dir = arg.replace("--root-dir=", "") + +if old_runner is None or new_runner is None or benchmark_file is None: + print( + "Expected usage: python3 scripts/regression_test_runner.py --old=/old/benchmark_runner --new=/new/benchmark_runner --benchmarks=/benchmark/list.csv" + ) + exit(1) + +if not os.path.isfile(old_runner): + print(f"Failed to find old runner {old_runner}") + exit(1) + +if not os.path.isfile(new_runner): + print(f"Failed to find new runner {new_runner}") + exit(1) + +complete_timings = {old_runner: [], new_runner: []} + + +def run_benchmark(runner, benchmark): + benchmark_args = [runner, benchmark] + + if root_dir: + benchmark_args += [f"--root-dir" ] + benchmark_args += [root_dir] + + if threads is not None: + benchmark_args += ["--threads=%d" % (threads,)] + if disable_timeout: + benchmark_args += ["--disable-timeout"] + timeout_seconds = max_timeout + else: + timeout_seconds = 600 + + try: + proc = subprocess.run(benchmark_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout_seconds) + out = proc.stdout.decode('utf8') + err = proc.stderr.decode('utf8') + returncode = proc.returncode + except subprocess.TimeoutExpired: + print("Failed to run benchmark " + benchmark) + print(f"Aborted due to exceeding the limit of {timeout_seconds} seconds") + return 'Failed to run benchmark ' + benchmark + if returncode != 0: + print("Failed to run benchmark " + benchmark) + print( + '''==================================================== +============== STDERR ============= +==================================================== +''' + ) + print(err) + print( + '''==================================================== +============== STDOUT ============= +==================================================== +''' + ) + print(out) + if 'HTTP' in err: + print("Ignoring HTTP error and terminating the running of the regression tests") + exit(0) + return 'Failed to run benchmark ' + benchmark + if verbose: + print(err) + # read the input CSV + f = StringIO(err) + csv_reader = csv.reader(f, delimiter='\t') + header = True + timings = [] + try: + for row in csv_reader: + if len(row) == 0: + continue + if header: + header = False + else: + timings.append(row[2]) + complete_timings[runner].append(row[2]) + return float(statistics.median(timings)) + except: + print("Failed to run benchmark " + benchmark) + print(err) + return 'Failed to run benchmark ' + benchmark + + +def run_benchmarks(runner, benchmark_list): + results = {} + for benchmark in benchmark_list: + results[benchmark] = run_benchmark(runner, benchmark) + return results + + +# read the initial benchmark list +with open(benchmark_file, 'r') as f: + benchmark_list = [x.strip() for x in f.read().split('\n') if len(x) > 0] + +multiply_percentage = 1.0 + regression_threshold_percentage +other_results = [] +error_list = [] +for i in range(number_repetitions): + regression_list = [] + if len(benchmark_list) == 0: + break + print( + f'''==================================================== +============== ITERATION {i} ============= +============== REMAINING {len(benchmark_list)} ============= +==================================================== +''' + ) + + old_results = run_benchmarks(old_runner, benchmark_list) + new_results = run_benchmarks(new_runner, benchmark_list) + + for benchmark in benchmark_list: + old_res = old_results[benchmark] + new_res = new_results[benchmark] + if isinstance(old_res, str) or isinstance(new_res, str): + # benchmark failed to run - always a regression + error_list.append([benchmark, old_res, new_res]) + elif (no_regression_fail == False) and ( + (old_res + regression_threshold_seconds) * multiply_percentage < new_res + ): + regression_list.append([benchmark, old_res, new_res]) + else: + other_results.append([benchmark, old_res, new_res]) + benchmark_list = [x[0] for x in regression_list] + +exit_code = 0 +regression_list += error_list +if len(regression_list) > 0: + exit_code = 1 + print( + '''==================================================== +============== REGRESSIONS DETECTED ============= +==================================================== +''' + ) + for regression in regression_list: + print(f"{regression[0]}") + print(f"Old timing: {regression[1]}") + print(f"New timing: {regression[2]}") + print("") + print( + '''==================================================== +============== OTHER TIMINGS ============= +==================================================== +''' + ) +else: + print( + '''==================================================== +============== NO REGRESSIONS DETECTED ============= +==================================================== +''' + ) + +other_results.sort() +for res in other_results: + print(f"{res[0]}") + print(f"Old timing: {res[1]}") + print(f"New timing: {res[2]}") + print("") + +time_a = geomean(complete_timings[old_runner]) +time_b = geomean(complete_timings[new_runner]) + + +print("") +if isinstance(time_a, str) or isinstance(time_b, str): + print(f"Old: {time_a}") + print(f"New: {time_b}") +elif time_a > time_b * 1.01: + print(f"Old timing geometric mean: {time_a}") + print(f"New timing geometric mean: {time_b}, roughly {int((time_a - time_b) * 100.0 / time_a)}% faster") +elif time_b > time_a * 1.01: + print(f"Old timing geometric mean: {time_a}, roughly {int((time_b - time_a) * 100.0 / time_b)}% faster") + print(f"New timing geometric mean: {time_b}") +else: + print(f"Old timing geometric mean: {time_a}") + print(f"New timing geometric mean: {time_b}") + +# nuke cached benchmark data between runs +if os.path.isdir("duckdb_benchmark_data"): + shutil.rmtree('duckdb_benchmark_data') +exit(exit_code)