Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228) #8368

Merged
merged 2 commits into from
Dec 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,14 @@ class CHListenerApi extends ListenerApi with Logging {
"local_engine.settings.log_processors_profiles" -> "true")
conf.setCHSettings("spark_version", SPARK_VERSION)
// add memory limit for external sort
val externalSortKey = CHConf.runtimeSettings("max_bytes_before_external_sort")
if (conf.getLong(externalSortKey, -1) < 0) {
if (conf.getLong(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, -1) < 0) {
if (conf.getBoolean("spark.memory.offHeap.enabled", defaultValue = false)) {
val memSize = JavaUtils.byteStringAsBytes(conf.get("spark.memory.offHeap.size"))
if (memSize > 0L) {
val cores = conf.getInt("spark.executor.cores", 1).toLong
val sortMemLimit = ((memSize / cores) * 0.8).toLong
logDebug(s"max memory for sorting: $sortMemLimit")
conf.set(externalSortKey, sortMemLimit.toString)
conf.set(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, sortMemLimit.toString)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ object RuntimeSettings {
.doc("https://clickhouse.com/docs/en/operations/settings/settings#min_insert_block_size_rows")
.longConf
.createWithDefault(1048449)

val MAX_BYTES_BEFORE_EXTERNAL_SORT =
buildConf(runtimeSettings("max_bytes_before_external_sort"))
.doc("https://clickhouse.com/docs/en/operations/settings/query-complexity#settings-max_bytes_before_external_sort")
.longConf
.createWithDefault(0)
// scalastyle:on line.size.limit

/** Gluten Configuration */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.gluten.execution.mergetree

import org.apache.gluten.GlutenConfig
import org.apache.gluten.backendsapi.clickhouse.CHConf
import org.apache.gluten.backendsapi.clickhouse.{CHConf, RuntimeSettings}
import org.apache.gluten.execution.GlutenClickHouseTPCHAbstractSuite

import org.apache.spark.SparkConf
Expand Down Expand Up @@ -53,7 +53,7 @@ class GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite

test("GLUTEN-6470: Fix Task not serializable error when inserting mergetree data") {

val externalSortKey = CHConf.runtimeSettings("max_bytes_before_external_sort")
val externalSortKey = RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key
assertResult(3435973836L)(spark.conf.get(externalSortKey).toLong)

spark.sql(s"""
Expand Down
4 changes: 2 additions & 2 deletions cpp-ch/clickhouse.version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
CH_BRANCH=rebase_ch/20241224
CH_COMMIT=b38537577c5
CH_BRANCH=rebase_ch/20241228
CH_COMMIT=bf8e58b57e9
5 changes: 5 additions & 0 deletions cpp-ch/local-engine/Common/CHUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ namespace Setting
{
extern const SettingsUInt64 prefer_external_sort_block_bytes;
extern const SettingsUInt64 max_bytes_before_external_sort;
extern const SettingsDouble max_bytes_ratio_before_external_sort;
extern const SettingsBool query_plan_merge_filters;
extern const SettingsBool compile_expressions;
extern const SettingsShortCircuitFunctionEvaluation short_circuit_function_evaluation;
Expand Down Expand Up @@ -644,6 +645,10 @@ void BackendInitializerUtil::initSettings(const SparkConfigs::ConfigMap & spark_
settings[Setting::short_circuit_function_evaluation] = ShortCircuitFunctionEvaluation::DISABLE;
///

// After https://github.com/ClickHouse/ClickHouse/pull/73422
// Since we already set max_bytes_before_external_sort, set max_bytes_ratio_before_external_sort to 0
settings[Setting::max_bytes_ratio_before_external_sort] = 0.;

for (const auto & [key, value] : spark_conf_map)
{
// Firstly apply spark.gluten.sql.columnar.backend.ch.runtime_config.local_engine.settings.* to settings
Expand Down
Loading