Skip to content

Commit

Permalink
Hook the MLX matmul kernels in candle-core. (huggingface#2473)
Browse files Browse the repository at this point in the history
LaurentMazare authored Sep 12, 2024
1 parent 0cb0bd1 commit 72d6490
Showing 2 changed files with 38 additions and 0 deletions.
6 changes: 6 additions & 0 deletions candle-core/src/metal_backend/device.rs
Original file line number Diff line number Diff line change
@@ -70,6 +70,8 @@ pub struct MetalDevice {
pub(crate) buffers: AllocatedBuffers,
/// Seed for random number generation.
pub(crate) seed: Arc<Mutex<Buffer>>,
/// Whether to use the MLX matmul kernels instead of the MFA ones.
pub(crate) use_mlx_mm: bool,
}

impl std::fmt::Debug for MetalDevice {
@@ -87,6 +89,10 @@ impl std::ops::Deref for MetalDevice {
}

impl MetalDevice {
pub fn set_use_mlx_mm(&mut self, use_mlx_mm: bool) {
self.use_mlx_mm = use_mlx_mm
}

pub fn id(&self) -> DeviceId {
self.id
}
32 changes: 32 additions & 0 deletions candle-core/src/metal_backend/mod.rs
Original file line number Diff line number Diff line change
@@ -1425,6 +1425,33 @@ impl BackendStorage for MetalStorage {
&buffer,
)
.map_err(MetalError::from)?;
} else if self.device.use_mlx_mm {
let dtype = match self.dtype {
DType::F32 => candle_metal_kernels::GemmDType::F32,
DType::F16 => candle_metal_kernels::GemmDType::F16,
DType::BF16 => candle_metal_kernels::GemmDType::BF16,
dtype => {
return Err(MetalError::Message(format!(
"mlx matmul doesn't support {dtype:?}"
))
.into())
}
};
candle_metal_kernels::call_mlx_gemm(
&self.device.device,
&command_buffer,
&self.device.kernels,
dtype,
(b, m, n, k),
lhs_l.stride(),
lhs_l.start_offset() * self.dtype.size_in_bytes(),
&self.buffer,
rhs_l.stride(),
rhs_l.start_offset() * rhs.dtype.size_in_bytes(),
&rhs.buffer,
&buffer,
)
.map_err(MetalError::from)?;
} else {
let name = match self.dtype {
DType::F32 => "sgemm",
@@ -1818,6 +1845,10 @@ impl BackendDevice for MetalDevice {
let command_buffer_index = Arc::new(RwLock::new(0));
let kernels = Arc::new(Kernels::new());
let buffers = Arc::new(RwLock::new(HashMap::new()));
let use_mlx_mm = match std::env::var("CANDLE_USE_MLX_MM").as_deref() {
Ok("false") | Ok("False") | Ok("FALSE") | Ok("0") | Err(_) => false,
Ok(_) => true,
};
let compute_per_buffer = match std::env::var("CANDLE_METAL_COMPUTE_PER_BUFFER") {
Ok(val) => val.parse()?,
_ => 50,
@@ -1837,6 +1868,7 @@ impl BackendDevice for MetalDevice {
buffers,
kernels,
seed,
use_mlx_mm,
})
}

0 comments on commit 72d6490

Please sign in to comment.