Skip to content

Commit

Permalink
Launch kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed Jan 19, 2020
1 parent f37beda commit 8f21d43
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 5 deletions.
42 changes: 39 additions & 3 deletions example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,48 @@ int main ()
// Test comment
out[tid] = a * x[tid] + y[tid];
}
}, (float, a), (const float *, x), (const float *, y), (float *, out));
}, (int, a), (const int *, x), (const int *, y), (int *, out));

size_t n = 1024;

nlohmann::json data;
data["N"] = 1024;
data["N"] = n;

int a = 2;
int *x {};
int *y {};
int *out {};

cudaMalloc (&x, n * sizeof (int));
cudaMalloc (&y, n * sizeof (int));
cudaMalloc (&out, n * sizeof (int));

std::unique_ptr<int[]> h_x (new int[n]);
std::unique_ptr<int[]> h_y (new int[n]);
std::unique_ptr<int[]> h_out (new int[n]);

for (size_t i = 0; i < n; i++)
{
h_x[i] = 1;
h_y[i] = 2;
}

cudaMemcpy (x, h_x.get (), n * sizeof (float), cudaMemcpyHostToDevice);
cudaMemcpy (y, h_y.get (), n * sizeof (float), cudaMemcpyHostToDevice);

auto saxpy_kernel = saxpy.compile (data);
saxpy_kernel.launch (1, 1024, a, x, y, out);

cudaMemcpy (h_out.get (), out, n * sizeof (float), cudaMemcpyDeviceToHost);

for (size_t i = 0; i < n; i++)
{
int target_value = a * h_x[i] + h_y[i];
if (target_value != h_out[i])
std::cerr << "Error in out[" << i << "] = " << h_out[i] << " != " << target_value << "\n";
}

cudaDeviceSynchronize ();
cudaFree (x);
cudaFree (y);
cudaFree (out);
}
7 changes: 5 additions & 2 deletions include/cuda_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class kernel_base
~kernel_base ();

protected:
void launch_base (dim3 grid_size, dim3 block_size);
void launch_base (dim3 grid_size, dim3 block_size, void **params);

private:
std::unique_ptr<char[]> ptx;
Expand All @@ -38,6 +38,8 @@ class kernel_base
template<typename... args_types>
class kernel : public kernel_base
{
std::vector<void*> params;

public:
kernel (kernel &&) = default;
explicit kernel (const std::string &kernel_name, std::unique_ptr<char[]> ptx_arg)
Expand All @@ -46,7 +48,8 @@ class kernel : public kernel_base

void launch (dim3 grid_size, dim3 block_size, args_types... args)
{

params = { &args... };
kernel_base::launch_base (grid_size, block_size, params.data ());
}
};

Expand Down
5 changes: 5 additions & 0 deletions src/cuda_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,9 @@ std::unique_ptr<char[]> cuda_jit_base::compile_base (const nlohmann::json &json)
return std::move (ptx);
}

void kernel_base::launch_base (dim3 grid_size, dim3 block_size, void **params)
{
throw_on_error (cuLaunchKernel (impl->kernel_fn, grid_size.x, grid_size.y, grid_size.z, block_size.x, block_size.y, block_size.z, 0, 0, params, nullptr));
}

}

0 comments on commit 8f21d43

Please sign in to comment.