Launch kernel

gevtushenko · Jan 19, 2020 · 8f21d43 · 8f21d43
1 parent f37beda
commit 8f21d43
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 5 deletions.
diff --git a/example.cpp b/example.cpp
@@ -18,12 +18,48 @@ int main ()
           // Test comment
           out[tid] = a * x[tid] + y[tid];
         }
-    }, (float, a), (const float *, x), (const float *, y), (float *, out));
+    }, (int, a), (const int *, x), (const int *, y), (int *, out));
+
+  size_t n = 1024;
 
   nlohmann::json data;
-  data["N"] = 1024;
+  data["N"] = n;
+
+  int a = 2;
+  int *x {};
+  int *y {};
+  int *out {};
+
+  cudaMalloc (&x, n * sizeof (int));
+  cudaMalloc (&y, n * sizeof (int));
+  cudaMalloc (&out, n * sizeof (int));
+
+  std::unique_ptr<int[]> h_x (new int[n]);
+  std::unique_ptr<int[]> h_y (new int[n]);
+  std::unique_ptr<int[]> h_out (new int[n]);
+
+  for (size_t i = 0; i < n; i++)
+    {
+      h_x[i] = 1;
+      h_y[i] = 2;
+    }
+
+  cudaMemcpy (x, h_x.get (), n * sizeof (float), cudaMemcpyHostToDevice);
+  cudaMemcpy (y, h_y.get (), n * sizeof (float), cudaMemcpyHostToDevice);
 
   auto saxpy_kernel = saxpy.compile (data);
+  saxpy_kernel.launch (1, 1024, a, x, y, out);
+
+  cudaMemcpy (h_out.get (), out, n * sizeof (float), cudaMemcpyDeviceToHost);
+
+  for (size_t i = 0; i < n; i++)
+    {
+      int target_value = a * h_x[i] + h_y[i];
+      if (target_value != h_out[i])
+        std::cerr << "Error in out[" << i << "] = " << h_out[i] << " != " << target_value << "\n";
+    }
 
-  cudaDeviceSynchronize ();
+  cudaFree (x);
+  cudaFree (y);
+  cudaFree (out);
 }
diff --git a/include/cuda_jit.h b/include/cuda_jit.h
@@ -28,7 +28,7 @@ class kernel_base
   ~kernel_base ();
 
 protected:
-  void launch_base (dim3 grid_size, dim3 block_size);
+  void launch_base (dim3 grid_size, dim3 block_size, void **params);
 
 private:
   std::unique_ptr<char[]> ptx;
@@ -38,6 +38,8 @@ class kernel_base
 template<typename... args_types>
 class kernel : public kernel_base
 {
+  std::vector<void*> params;
+
 public:
   kernel (kernel &&) = default;
   explicit kernel (const std::string &kernel_name, std::unique_ptr<char[]> ptx_arg)
@@ -46,7 +48,8 @@ class kernel : public kernel_base
 
   void launch (dim3 grid_size, dim3 block_size, args_types... args)
   {
-
+    params = { &args... };
+    kernel_base::launch_base (grid_size, block_size, params.data ());
   }
 };
 

diff --git a/src/cuda_jit.cpp b/src/cuda_jit.cpp
@@ -90,4 +90,9 @@ std::unique_ptr<char[]> cuda_jit_base::compile_base (const nlohmann::json &json)
   return std::move (ptx);
 }
 
+void kernel_base::launch_base (dim3 grid_size, dim3 block_size, void **params)
+{
+  throw_on_error (cuLaunchKernel (impl->kernel_fn, grid_size.x, grid_size.y, grid_size.z, block_size.x, block_size.y, block_size.z, 0, 0, params, nullptr));
+}
+
 }