Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add split_tensor_along method #917

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
avoid conv1d bound for cudnn
  • Loading branch information
swfsql committed Feb 9, 2024
commit 4e3f7c7a24728668f72cf3617a66f4476280f6fb
50 changes: 39 additions & 11 deletions dfdx-core/src/tensor_ops/utilities/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,33 +114,61 @@ pub trait Device<E: Dtype>:
+ crate::tensor_ops::axpy::AxpyKernel<E>

// conv1d
+ super::super::conv1d::Conv1DKernel<E>
+ NonCudnnCuda<E>
{
}

#[cfg(feature = "cudnn")]
pub trait NonCudnnCuda<E: Dtype> {}

#[cfg(not(feature = "cudnn"))]
pub trait NonCudnnCuda<E: Dtype>:
// conv1d
super::super::conv1d::Conv1DKernel<E>
{
}

#[cfg(feature = "f16")]
impl Device<f16> for crate::tensor::Cpu {}
#[cfg(feature = "f16")]
impl Device<AMP<f16>> for crate::tensor::Cpu {}
mod f16_ {
use super::*;
impl Device<f16> for crate::tensor::Cpu {}
impl NonCudnnCuda<f16> for crate::tensor::Cpu {}
impl Device<AMP<f16>> for crate::tensor::Cpu {}
impl NonCudnnCuda<AMP<f16>> for crate::tensor::Cpu {}
}
impl Device<f32> for crate::tensor::Cpu {}
impl NonCudnnCuda<f32> for crate::tensor::Cpu {}
impl Device<f64> for crate::tensor::Cpu {}
impl NonCudnnCuda<f64> for crate::tensor::Cpu {}

#[cfg(all(feature = "cuda", feature = "f16"))]
impl Device<f16> for crate::tensor::Cuda {}
#[cfg(all(feature = "cuda", feature = "f16"))]
impl Device<AMP<f16>> for crate::tensor::Cuda {}
#[cfg(feature = "cuda")]
impl Device<f32> for crate::tensor::Cuda {}
mod cuda_f16 {
use super::*;
impl Device<f16> for crate::tensor::Cuda {}
impl NonCudnnCuda<f16> for crate::tensor::Cuda {}
impl Device<AMP<f16>> for crate::tensor::Cuda {}
impl NonCudnnCuda<AMP<f16>> for crate::tensor::Cuda {}
}
#[cfg(feature = "cuda")]
impl Device<f64> for crate::tensor::Cuda {}
mod cuda {
use super::*;
impl Device<f32> for crate::tensor::Cuda {}
impl NonCudnnCuda<f32> for crate::tensor::Cuda {}
impl Device<f64> for crate::tensor::Cuda {}
impl NonCudnnCuda<f64> for crate::tensor::Cuda {}
}

// TODO: How can we implement this for f16 when WGSL doesn't support f16 yet?
// #[cfg(all(feature = "webgpu", feature = "f16"))]
// impl Device<f16> for crate::tensor::Webgpu {}
// #[cfg(all(feature = "webgpu", feature = "f16"))]
// impl Device<AMP<f16>> for crate::tensor::Webgpu {}
#[cfg(feature = "webgpu")]
impl Device<f32> for crate::tensor::Webgpu {}
mod webgpu {
use super::*;
impl Device<f32> for crate::tensor::Webgpu {}
impl NonCudnnCuda<f32> for crate::tensor::Webgpu {}
}

// TODO: How can we implement this for f64 when WGSL doesn't support f64 yet?
// #[cfg(feature = "webgpu")]
Expand Down