From dab5e4310fa7fad5eaeff2245caa5be98c9ada09 Mon Sep 17 00:00:00 2001 From: milo Date: Fri, 13 Dec 2024 13:11:48 +0000 Subject: [PATCH 1/4] fix: handeling hip local memory error in core and compiler --- kernel_tuner/backends/compiler.py | 25 +++++++++++++++++++++---- kernel_tuner/core.py | 7 +++++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/kernel_tuner/backends/compiler.py b/kernel_tuner/backends/compiler.py index b5724a1a0..72de2cbaf 100644 --- a/kernel_tuner/backends/compiler.py +++ b/kernel_tuner/backends/compiler.py @@ -265,12 +265,23 @@ def compile(self, kernel_instance): if platform.system() == "Darwin": lib_extension = ".dylib" - subprocess.check_call([self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"]) - subprocess.check_call( + subprocess.run( + [self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True + ) + + subprocess.run( [self.compiler, filename + ".o"] + compiler_options + ["-shared", "-o", filename + lib_extension] - + lib_args + + lib_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True ) self.lib = np.ctypeslib.load_library(filename, ".") @@ -396,10 +407,16 @@ def memcpy_htod(self, dest, src): def cleanup_lib(self): """unload the previously loaded shared library""" + if self.lib is None: + return + if not self.using_openmp and not self.using_openacc: # this if statement is necessary because shared libraries that use # OpenMP will core dump when unloaded, this is a well-known issue with OpenMP logging.debug("unloading shared library") - _ctypes.dlclose(self.lib._handle) + try: + _ctypes.dlclose(self.lib._handle) + finally: + self.lib = None units = {} diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py index abd4a017e..66a51981b 100644 --- a/kernel_tuner/core.py +++ b/kernel_tuner/core.py @@ -647,8 +647,11 @@ def compile_kernel(self, instance, verbose): shared_mem_error_messages = [ "uses too much shared data", "local memory limit exceeded", + r"local memory \(\d+\) exceeds limit \(\d+\)", ] - if any(msg in str(e) for msg in shared_mem_error_messages): + error_message = str(e.stderr) if hasattr(e, "stderr") else str(e) + if any(re.search(msg, error_message) for msg in shared_mem_error_messages): + print("DEBUG: SHARED MEM ERROR") logging.debug( "compile_kernel failed due to kernel using too much shared memory" ) @@ -715,7 +718,7 @@ def create_kernel_instance(self, kernel_source, kernel_options, params, verbose) ) # check for templated kernel - if kernel_source.lang in ["CUDA", "NVCUDA"] and "<" in name and ">" in name: + if kernel_source.lang in ["CUDA", "NVCUDA", "HIP"] and "<" in name and ">" in name: kernel_string, name = wrap_templated_kernel(kernel_string, name) # Preprocess GPU arguments. Require for handling `Tunable` arguments From 0227dc9337be2a78e02c19133da537a490628adf Mon Sep 17 00:00:00 2001 From: milo Date: Fri, 13 Dec 2024 14:10:00 +0000 Subject: [PATCH 2/4] fix(test_compiler_functions): updated test_compile_detects_device_code with new compiler changes --- test/test_compiler_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_compiler_functions.py b/test/test_compiler_functions.py index 0c9d7f86a..913fee85d 100644 --- a/test/test_compiler_functions.py +++ b/test/test_compiler_functions.py @@ -198,11 +198,11 @@ def test_compile_detects_device_code(npct, subprocess): cfunc = CompilerFunctions() cfunc.compile(kernel_instance) - print(subprocess.check_call.call_args_list) + print(subprocess.run.call_args_list) # assert the filename suffix used for source compilation is .cu dot_cu_used = False - for call in subprocess.check_call.call_args_list: + for call in subprocess.run.call_args_list: args, kwargs = call args = args[0] print(args) From 1227151f47f7628987d6ab3f553253363d6553e6 Mon Sep 17 00:00:00 2001 From: milo Date: Fri, 13 Dec 2024 14:22:23 +0000 Subject: [PATCH 3/4] clean: deleted debug prints --- kernel_tuner/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py index 66a51981b..4323c411c 100644 --- a/kernel_tuner/core.py +++ b/kernel_tuner/core.py @@ -651,7 +651,6 @@ def compile_kernel(self, instance, verbose): ] error_message = str(e.stderr) if hasattr(e, "stderr") else str(e) if any(re.search(msg, error_message) for msg in shared_mem_error_messages): - print("DEBUG: SHARED MEM ERROR") logging.debug( "compile_kernel failed due to kernel using too much shared memory" ) From cbdd0a8e0c94048ddef2026c48a83cc6cd3748b5 Mon Sep 17 00:00:00 2001 From: Ben van Werkhoven Date: Fri, 13 Dec 2024 16:17:34 +0100 Subject: [PATCH 4/4] remove whitespace --- kernel_tuner/backends/compiler.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel_tuner/backends/compiler.py b/kernel_tuner/backends/compiler.py index 72de2cbaf..154f501ba 100644 --- a/kernel_tuner/backends/compiler.py +++ b/kernel_tuner/backends/compiler.py @@ -268,19 +268,19 @@ def compile(self, kernel_instance): subprocess.run( [self.compiler, "-c", source_file] + compiler_options + ["-o", filename + ".o"], stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - check=True + stderr=subprocess.PIPE, + text=True, + check=True ) - + subprocess.run( [self.compiler, filename + ".o"] + compiler_options + ["-shared", "-o", filename + lib_extension] + lib_args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, check=True )