Skip to content

Commit

Permalink
Use the new MLX kernels to handle the BF16 matmul. (huggingface#2470)
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentMazare authored Sep 11, 2024
1 parent 5635650 commit afb6575
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 26 deletions.
4 changes: 2 additions & 2 deletions candle-core/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ impl Device {

pub fn supports_bf16(&self) -> bool {
match self {
Self::Cuda(_) => true,
Self::Metal(_) | Self::Cpu => false,
Self::Cuda(_) | Self::Metal(_) => true,
Self::Cpu => false,
}
}

Expand Down
68 changes: 44 additions & 24 deletions candle-core/src/metal_backend/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1398,6 +1398,7 @@ impl BackendStorage for MetalStorage {
.map_err(MetalError::from)?;
Ok(acc)
}

fn matmul(
&self,
rhs: &Self,
Expand All @@ -1406,32 +1407,51 @@ impl BackendStorage for MetalStorage {
rhs_l: &Layout,
) -> Result<Self> {
let buffer = self.device.new_buffer(b * m * n, self.dtype, "matmul")?;
let name = match self.dtype {
DType::F32 => "sgemm",
DType::F16 => "hgemm",
DType::BF16 => "bgemm",
dtype => {
return Err(MetalError::Message(format!("matmul doesn't support {dtype:?}")).into())
}
};

let command_buffer = self.device.command_buffer()?;
command_buffer.set_label("matmul");
candle_metal_kernels::call_gemm(
&self.device.device,
&command_buffer,
&self.device.kernels,
name,
(b, m, n, k),
lhs_l.stride(),
lhs_l.start_offset() * self.dtype.size_in_bytes(),
&self.buffer,
rhs_l.stride(),
rhs_l.start_offset() * rhs.dtype.size_in_bytes(),
&rhs.buffer,
&buffer,
)
.map_err(MetalError::from)?;
if self.dtype == DType::BF16 {
candle_metal_kernels::call_mlx_gemm(
&self.device.device,
&command_buffer,
&self.device.kernels,
candle_metal_kernels::GemmDType::BF16,
(b, m, n, k),
lhs_l.stride(),
lhs_l.start_offset() * self.dtype.size_in_bytes(),
&self.buffer,
rhs_l.stride(),
rhs_l.start_offset() * rhs.dtype.size_in_bytes(),
&rhs.buffer,
&buffer,
)
.map_err(MetalError::from)?;
} else {
let name = match self.dtype {
DType::F32 => "sgemm",
DType::F16 => "hgemm",
dtype => {
return Err(
MetalError::Message(format!("matmul doesn't support {dtype:?}")).into(),
)
}
};

candle_metal_kernels::call_gemm(
&self.device.device,
&command_buffer,
&self.device.kernels,
name,
(b, m, n, k),
lhs_l.stride(),
lhs_l.start_offset() * self.dtype.size_in_bytes(),
&self.buffer,
rhs_l.stride(),
rhs_l.start_offset() * rhs.dtype.size_in_bytes(),
&rhs.buffer,
&buffer,
)
.map_err(MetalError::from)?;
}
Ok(Self::new(
buffer,
self.device.clone(),
Expand Down

0 comments on commit afb6575

Please sign in to comment.