Skip to content

Commit

Permalink
Merge pull request #2749 from finos/arrow-17
Browse files Browse the repository at this point in the history
Upgrade Apache Arrow C++ to 17.0.0
  • Loading branch information
texodus authored Sep 15, 2024
2 parents fc554a9 + aa83996 commit 47f6cbe
Show file tree
Hide file tree
Showing 20 changed files with 75 additions and 5,604 deletions.
3 changes: 2 additions & 1 deletion cmake/arrow.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ project(arrow-download NONE)
include(ExternalProject)
ExternalProject_Add(apachearrow
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG apache-arrow-12.0.0
GIT_TAG apache-arrow-17.0.0
SOURCE_DIR "${CMAKE_BINARY_DIR}/arrow-src"
BINARY_DIR "${CMAKE_BINARY_DIR}/arrow-build"
SOURCE_SUBDIR "cpp"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
Expand Down
308 changes: 0 additions & 308 deletions cmake/arrow/CMakeLists.txt

This file was deleted.

53 changes: 0 additions & 53 deletions cmake/arrow/config.h

This file was deleted.

2 changes: 1 addition & 1 deletion cmake/flatbuffers.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ project(flatbuffers-download NONE)
include(ExternalProject)
ExternalProject_Add(flatbuffers
GIT_REPOSITORY https://github.com/google/flatbuffers.git
GIT_TAG v2.0.5
GIT_TAG v23.5.26
SOURCE_DIR "${CMAKE_BINARY_DIR}/flatbuffers-src"
BINARY_DIR "${CMAKE_BINARY_DIR}/flatbuffers-build"
CONFIGURE_COMMAND ""
Expand Down
64 changes: 0 additions & 64 deletions cmake/modules/FindFlatbuffers.cmake

This file was deleted.

25 changes: 20 additions & 5 deletions cmake/modules/FindInstallDependency.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,27 @@ function(psp_build_dep name cmake_file)
endif()

if(${name} STREQUAL arrow)
# Overwrite arrow's CMakeLists with our custom, minimal CMakeLists.
configure_file(${PSP_CMAKE_MODULE_PATH}/${name}/CMakeLists.txt ${CMAKE_BINARY_DIR}/${name}-src/cpp/ COPYONLY)
configure_file(${PSP_CMAKE_MODULE_PATH}/${name}/config.h ${CMAKE_BINARY_DIR}/${name}-src/cpp/src/arrow/util/ COPYONLY)
set(ARROW_SIMD_LEVEL "NONE")
set(ARROW_DEFINE_OPTIONS ON)
set(ARROW_RUNTIME_SIMD_LEVEL "NONE")
set(ARROW_BUILD_SHARED OFF)
set(ARROW_BUILD_STATIC ON)
set(ARROW_BUILD_INTEGRATION OFF)
set(ARROW_JEMALLOC OFF)
set(ARROW_CSV ON)
set(ARROW_LZ4 ON)
set(ARROW_WITH_ZSTD ON)
set(ARROW_WITH_LZ4 ON)
set(ARROW_ENABLE_THREADING OFF)
set(ARROW_NO_EXPORT ON)
if(WIN32 AND NOT PSP_BUILD_WASM)
set(ARROW_DEPENDENCY_SOURCE "BUNDLED")
endif()
include_directories(SYSTEM ${CMAKE_BINARY_DIR}/${name}-build/src)
add_subdirectory(${CMAKE_BINARY_DIR}/${name}-src/cpp/
${CMAKE_BINARY_DIR}/${name}-build
EXCLUDE_FROM_ALL)
EXCLUDE_FROM_ALL
)

include_directories(SYSTEM ${CMAKE_BINARY_DIR}/${name}-src/cpp/src/)
elseif(${name} STREQUAL exprtk)
Expand Down Expand Up @@ -78,7 +93,7 @@ function(psp_build_dep name cmake_file)
endif()

if(NOT PSP_WASM_BUILD AND (MACOS OR NOT MANYLINUX))
if(${name} STREQUAL arrow OR ${name} STREQUAL flatbuffers OR ${name} STREQUAL double-conversion OR ${name} STREQUAL re2)
if(${name} STREQUAL arrow_static OR ${name} STREQUAL flatbuffers OR ${name} STREQUAL double-conversion OR ${name} STREQUAL re2)
target_compile_options(${name} PRIVATE -fvisibility=hidden)
endif()
endif()
Expand Down
77 changes: 34 additions & 43 deletions cpp/perspective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
-fexceptions \
-g3 \
")
if (PSP_WASM_EXCEPTIONS)
set(OPT_FLAGS "${OPT_FLAGS} -fwasm-exceptions ")
endif()
if (PSP_WASM_EXCEPTIONS)
set(OPT_FLAGS "${OPT_FLAGS} -fwasm-exceptions ")
endif()
endif ()
else()
set(OPT_FLAGS " \
Expand All @@ -299,22 +299,11 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
if(PSP_PYODIDE)
set(CMAKE_FIND_ROOT_PATH "${CMAKE_FIND_ROOT_PATH};/usr/local/")
find_package(Boost REQUIRED)
else()
elseif(NOT WIN32)
set(Boost_USE_STATIC_LIBS ON)
find_package(Boost REQUIRED COMPONENTS system)
endif()

if(NOT Boost_FOUND)
message(FATAL_ERROR "${Red}Boost could not be located${ColorReset}")
else()
psp_build_message("${Cyan}Found Boost: `Boost_INCLUDE_DIRS`: ${Boost_INCLUDE_DIRS}, `Boost_LIBRARY_DIRS` - ${Boost_LIBRARY_DIRS} ${ColorReset}")

if(WIN32)
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
add_definitions(-DBOOST_UUID_FORCE_AUTO_LINK)
endif()
endif()

if(WIN32)
foreach(warning 4244 4251 4267 4275 4290 4786 4305 4996)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd${warning}")
Expand All @@ -329,21 +318,8 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
endif()

if(PSP_PYTHON_BUILD)
# ########################
# PYTHON BINDINGS BUILD #
# ########################
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
include_directories("${PSP_PYTHON_SRC}/perspective/include")

if(MANYLINUX)
# Manylinux docker images have no shared libraries
# The instead use a statically built python.
# Cmake's default FindPython can't find the python headers
# without also finding (or failing to find) the python libraries
# so we use a custom FindPythonHeaders that is the same as the
# default, but ignores when the python libraries can't be found.
psp_build_message("${Red}Manylinux build has no python shared libraries${ColorReset}")
endif()
endif()
endif()

Expand All @@ -353,10 +329,18 @@ set(RAPIDJSON_BUILD_TESTS OFF CACHE BOOL "Disable rapidjson tests")
set(CMAKE_C_FLAGS " \
-O3 \
")
if (PSP_WASM_EXCEPTIONS)
set(CMAKE_CXX_FLAGS " -fwasm-exceptions \
-O3 \
-g0 \
")
else()
set(CMAKE_CXX_FLAGS " \
-O3 \
")

endif()

if(PSP_PYODIDE)
set(RELOCATABLE_FLAGS "-sRELOCATABLE=1 -sSIDE_MODULE=2 -sWASM_BIGINT=1")

Expand Down Expand Up @@ -399,6 +383,24 @@ psp_build_dep("lz4" "${PSP_CMAKE_MODULE_PATH}/lz4.txt.in")
# Build minimal arrow itself
psp_build_dep("arrow" "${PSP_CMAKE_MODULE_PATH}/arrow.txt.in")

if(PSP_PYTHON_BUILD AND NOT PSP_PYODIDE)
# Boost must be added after arrow to prevent linking bug on Windows.
if(WIN32)
set(Boost_USE_STATIC_LIBS ON)
find_package(Boost REQUIRED COMPONENTS system)
endif()
if(NOT Boost_FOUND)
message(FATAL_ERROR "${Red}Boost could not be located${ColorReset}")
else()
psp_build_message("${Cyan}Found Boost: `Boost_INCLUDE_DIRS`: ${Boost_INCLUDE_DIRS}, `Boost_LIBRARY_DIRS` - ${Boost_LIBRARY_DIRS} ${ColorReset}")
if(WIN32)
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
add_definitions(-DBOOST_UUID_FORCE_AUTO_LINK)
endif()
endif()
endif()


# Build re2 as our regex library
# this is a workaround for some re2-specific weirdness
add_definitions(-DTARGET_OS_OSX=1)
Expand Down Expand Up @@ -587,7 +589,7 @@ if(PSP_WASM_BUILD AND NOT PSP_PYTHON_BUILD)
add_library(psp ${WASM_SOURCE_FILES})
target_compile_definitions(psp PRIVATE PSP_ENABLE_WASM=1)
set_target_properties(psp PROPERTIES COMPILE_FLAGS "")
target_link_libraries(psp PRIVATE arrow re2 protos)
target_link_libraries(psp PRIVATE arrow_static re2 protos)

add_executable(perspective_esm src/cpp/binding_api.cpp)
target_link_libraries(perspective_esm psp protos)
Expand Down Expand Up @@ -628,7 +630,7 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
target_compile_definitions(psp PRIVATE PSP_ENABLE_PYTHON=1 PSP_ENABLE_WASM=1)
# support for emscripten exceptions https://emscripten.org/docs/porting/exceptions.html#emscripten-javascript-based-exception-support
target_compile_options(psp PUBLIC -fexceptions -fvisibility=hidden)
target_compile_options(arrow PUBLIC -fexceptions -fvisibility=hidden)
target_compile_options(arrow_static PUBLIC -fexceptions -fvisibility=hidden)
target_compile_options(re2 PUBLIC -fexceptions -fvisibility=hidden)
target_compile_options(protos PUBLIC -fexceptions -fvisibility=hidden)
else()
Expand All @@ -647,25 +649,14 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
# intentionally blank
else()
target_compile_options(psp PRIVATE -fvisibility=hidden)
# target_compile_options(psppy PRIVATE -Wdeprecated-declarations)
endif()

# Link against minimal arrow static library
target_link_libraries(psp PRIVATE arrow re2 protos)
# target_link_libraries(psppy psp)

# The compiled libraries will be put in CMAKE_LIBRARY_OUTPUT_DIRECTORY by default. In the
# setup.py file, we designate this to be in the build/lib.<platform> directory. However,
# since we want to be able to test perspective in-source, we also copy the libraries into
# the source folder. These two commands do that.
# add_custom_command(TARGET psp POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:psp> ${PSP_PYTHON_SRC}/table/)
# add_custom_command(TARGET psppy POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:psppy> ${PSP_PYTHON_SRC}/table/)

# #######################
target_link_libraries(psp PRIVATE arrow_static re2 protos lz4_static libzstd_static)
else()
add_library(psp STATIC ${WASM_SOURCE_FILES})
target_compile_options(psp PRIVATE -fvisibility=hidden)
target_link_libraries(psp PRIVATE arrow re2 protos)
target_link_libraries(psp PRIVATE arrow_static re2 protos)
endif()

if(PSP_CPP_BUILD_STRICT AND NOT WIN32)
Expand Down
7 changes: 0 additions & 7 deletions cpp/perspective/src/cpp/arrow_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,7 @@
#include <perspective/arrow_csv.h>
#include <arrow/util/value_parsing.h>
#include <arrow/io/memory.h>

#ifdef PSP_ENABLE_WASM
// This causes build warnings
// https://github.com/emscripten-core/emscripten/issues/8574
#include <perspective/vendor/arrow_single_threaded_reader.h>
#else
#include <arrow/csv/reader.h>
#endif

template <class TimePoint>
static inline arrow::TimestampType::c_type
Expand Down
13 changes: 0 additions & 13 deletions cpp/perspective/src/cpp/table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "perspective/data_table.h"
#include "perspective/raw_types.h"
#include "perspective/schema.h"
// #include "arrow/vendored/datetime/date.h"
#include "rapidjson/document.h"
#include <chrono>
#include <ctime>
Expand Down Expand Up @@ -448,18 +447,6 @@ rapidjson_type_to_dtype(const rapidjson::Value& value) {
std::chrono::system_clock::time_point tp;

if (parse_all_date_time(tm, tp, str)) {
LOG_DEBUG(
"Parsed date: " << tm.tm_year + 1900 << "-" << tm.tm_mon + 1
<< "-" << tm.tm_mday << " " << tm.tm_hour
<< ":" << tm.tm_min << ":" << tm.tm_sec
);
auto tpm =
std::chrono::duration_cast<std::chrono::milliseconds>(
tp.time_since_epoch()
)
.count();
LOG_DEBUG("TP: " << tpm << '\n');

if (tm.tm_hour == 0 && tm.tm_min == 0 && tm.tm_sec == 0) {
return t_dtype::DTYPE_DATE;
}
Expand Down
Loading

0 comments on commit 47f6cbe

Please sign in to comment.