Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use num-traits for math ops instead of always using libm (for std and no-std) #1584

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions crates/burn-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ std = [
"rmp-serde",
"serde/std",
"serde_json/std",
"num-traits/std",
]
doc = [
"std",
Expand Down Expand Up @@ -86,7 +87,7 @@ candle = ["burn-candle"]
wgpu = ["burn-wgpu"]

# Custom deserializer for Record that is helpful for importing data, such as PyTorch pt files.
record-item-custom-serde = ["thiserror", "regex", "num-traits"]
record-item-custom-serde = ["thiserror", "regex"]

# Serialization formats
experimental-named-tensor = ["burn-tensor/experimental-named-tensor"]
Expand All @@ -111,9 +112,9 @@ burn-tch = { path = "../burn-tch", version = "0.13.0", optional = true }
burn-candle = { path = "../burn-candle", version = "0.13.0", optional = true }

derive-new = { workspace = true }
libm = { workspace = true }
log = { workspace = true, optional = true }
rand = { workspace = true, features = ["std_rng"] } # Default enables std

# Using in place of use std::sync::Mutex when std is disabled
spin = { workspace = true, features = ["mutex", "spin_mutex"] }

Expand All @@ -130,7 +131,7 @@ rmp-serde = { workspace = true, optional = true }
serde_json = { workspace = true, features = ["alloc"] } #Default enables std
thiserror = { workspace = true, optional = true }
regex = { workspace = true, optional = true }
num-traits = { workspace = true, optional = true }
num-traits = { workspace = true }

[dev-dependencies]
tempfile = { workspace = true }
Expand Down
8 changes: 5 additions & 3 deletions crates/burn-core/src/nn/attention/mha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ use crate::{
nn,
tensor::{activation, backend::Backend, Bool, Tensor},
};
use libm::sqrtf;

#[cfg(not(feature = "std"))]
use num_traits::Float;

/// Configuration to create a [Multi Head Attention](MultiHeadAttention) layer.
#[derive(Config)]
Expand All @@ -35,7 +37,7 @@ pub struct MultiHeadAttentionConfig {
quiet_softmax: bool,
/// The type of function used to initialize neural network parameters
#[config(
default = "Initializer::KaimingUniform{gain:1.0/libm::sqrt(3.0), fan_out_only:false}"
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0), fan_out_only:false}"
)]
pub initializer: Initializer,
}
Expand Down Expand Up @@ -207,7 +209,7 @@ impl<B: Backend> MultiHeadAttention<B> {
fn attn_scores(&self, query: Tensor<B, 4>, key: Tensor<B, 4>) -> Tensor<B, 4> {
let attn_scores = query
.matmul(key.transpose())
.div_scalar(sqrtf(self.d_k as f32));
.div_scalar((self.d_k as f32).sqrt());

self.dropout.forward(attn_scores)
}
Expand Down
7 changes: 4 additions & 3 deletions crates/burn-core/src/nn/conv/conv1d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::tensor::backend::Backend;
use crate::tensor::Tensor;
use burn_tensor::module::conv1d;
use burn_tensor::ops::ConvOptions;
use libm::sqrt;

use super::checks;

Expand Down Expand Up @@ -37,7 +36,9 @@ pub struct Conv1dConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0),fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0),fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -132,7 +133,7 @@ mod tests {

let config = Conv1dConfig::new(5, 5, 5);
let k = (config.channels_in * config.kernel_size) as f64;
let k = sqrt(config.groups as f64 / k) as f32;
let k = (config.groups as f64 / k).sqrt() as f32;
let conv = config.init::<TestBackend>(&Default::default());

conv.weight.to_data().assert_within_range(-k..k);
Expand Down
11 changes: 6 additions & 5 deletions crates/burn-core/src/nn/conv/conv2d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use crate::tensor::backend::Backend;
use crate::tensor::Tensor;
use burn_tensor::module::conv2d;
use burn_tensor::ops::ConvOptions;
use libm::sqrt;

use super::checks;

Expand All @@ -36,7 +35,9 @@ pub struct Conv2dConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0),fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0),fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -135,7 +136,7 @@ mod tests {

let config = Conv2dConfig::new([5, 1], [5, 5]);
let k = (config.channels[0] * config.kernel_size[0] * config.kernel_size[1]) as f64;
let k = sqrt(config.groups as f64 / k) as f32;
let k = (config.groups as f64 / k).sqrt() as f32;
let device = Default::default();
let conv = config.init::<TestBackend>(&device);

Expand All @@ -161,7 +162,7 @@ mod tests {
TestBackend::seed(0);

let init = Initializer::KaimingUniform {
gain: 1.0 / sqrt(3.0),
gain: 1.0 / 3.0f64.sqrt(),
fan_out_only: true, // test that fan_out is passed to `init_with()`
};
let device = Default::default();
Expand All @@ -176,7 +177,7 @@ mod tests {
TestBackend::seed(0);

let init = Initializer::KaimingUniform {
gain: 1.0 / sqrt(3.0),
gain: 1.0 / 3.0f64.sqrt(),
fan_out_only: true,
};
let device = Default::default();
Expand Down
7 changes: 4 additions & 3 deletions crates/burn-core/src/nn/conv/conv_transpose1d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::tensor::backend::Backend;
use crate::tensor::Tensor;
use burn_tensor::module::conv_transpose1d;
use burn_tensor::ops::ConvTransposeOptions;
use libm::sqrt;

use super::checks;

Expand Down Expand Up @@ -38,7 +37,9 @@ pub struct ConvTranspose1dConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0),fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0),fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -135,7 +136,7 @@ mod tests {

let config = ConvTranspose1dConfig::new([5, 1], 5);
let k = (config.channels[1] * config.kernel_size) as f64;
let k = sqrt(config.groups as f64 / k) as f32;
let k = (config.groups as f64 / k).sqrt() as f32;
let conv = config.init::<TestBackend>(&Default::default());

conv.weight.to_data().assert_within_range(-k..k);
Expand Down
11 changes: 6 additions & 5 deletions crates/burn-core/src/nn/conv/conv_transpose2d.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
use crate as burn;

use super::checks;
use crate::config::Config;
use crate::module::Module;
use crate::module::Param;
use crate::nn::Initializer;
use crate::tensor::backend::Backend;
use crate::tensor::Tensor;

use burn_tensor::module::conv_transpose2d;
use burn_tensor::ops::ConvTransposeOptions;
use libm::sqrt;

use super::checks;

/// Configuration to create an [2D transposed convolution](ConvTranspose2d) layer.
#[derive(Config, Debug)]
Expand Down Expand Up @@ -38,7 +37,9 @@ pub struct ConvTranspose2dConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0),fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0),fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -136,7 +137,7 @@ mod tests {

let config = ConvTranspose2dConfig::new([5, 1], [5, 5]);
let k = (config.channels[1] * config.kernel_size[0] * config.kernel_size[1]) as f64;
let k = sqrt(config.groups as f64 / k) as f32;
let k = (config.groups as f64 / k).sqrt() as f32;
let conv = config.init::<TestBackend>(&Default::default());

conv.weight.to_data().assert_within_range(-k..k);
Expand Down
25 changes: 14 additions & 11 deletions crates/burn-core/src/nn/initializer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use burn_tensor::Shape;
use libm::sqrt;

use crate::config::Config;
use crate::module::{Param, ParamId};
Expand All @@ -8,6 +7,9 @@ use crate::tensor::{Distribution, Tensor};

use crate as burn;

#[cfg(not(feature = "std"))]
use num_traits::Float;

/// Enum specifying with what values a tensor should be initialized
#[derive(Config, Debug, PartialEq)]
pub enum Initializer {
Expand Down Expand Up @@ -129,15 +131,15 @@ impl Initializer {
Initializer::Uniform { min, max } => uniform_draw(shape, *min, *max, device),
Initializer::Normal { mean, std } => normal_draw(shape, *mean, *std, device),
Initializer::KaimingUniform { gain, fan_out_only } => {
let a = sqrt(3.0) * *gain * self.kaiming_std(*fan_out_only, fan_in, fan_out);
let a = 3.0f64.sqrt() * *gain * self.kaiming_std(*fan_out_only, fan_in, fan_out);
uniform_draw(shape, -a, a, device)
}
Initializer::KaimingNormal { gain, fan_out_only } => {
let std = *gain * self.kaiming_std(*fan_out_only, fan_in, fan_out);
normal_draw(shape, 0.0, std, device)
}
Initializer::XavierUniform { gain } => {
let a = sqrt(3.0) * *gain * self.xavier_std(fan_in, fan_out);
let a = 3.0f64.sqrt() * *gain * self.xavier_std(fan_in, fan_out);
uniform_draw(shape, -a, a, device)
}
Initializer::XavierNormal { gain } => {
Expand All @@ -158,7 +160,7 @@ impl Initializer {
"Can't use Kaiming initialization without specifying fan. Use init_with method.",
);

1.0 / sqrt(fan as f64)
1.0 / (fan as f64).sqrt()
}

fn xavier_std(&self, fan_in: Option<usize>, fan_out: Option<usize>) -> f64 {
Expand All @@ -170,7 +172,7 @@ impl Initializer {
"Can't use Xavier initialization without specifying fan out. Use init_with method and \
provide fan_out.",
);
sqrt(2.0 / (fan_in + fan_out) as f64)
(2.0 / (fan_in + fan_out) as f64).sqrt()
}
}

Expand Down Expand Up @@ -199,6 +201,7 @@ mod tests {
use super::*;

use burn_tensor::{Data, ElementConversion};
use num_traits::Pow;

pub type TB = burn_ndarray::NdArray<f32>;

Expand Down Expand Up @@ -293,7 +296,7 @@ mod tests {

let gain = 2_f64;
let (fan_in, fan_out) = (5, 6);
let k = gain * sqrt(3.0 / fan_in as f64);
let k = gain * (3.0 / fan_in as f64).sqrt();

let tensor: Tensor<TB, 2> = Initializer::KaimingUniform {
gain,
Expand All @@ -312,7 +315,7 @@ mod tests {
let (fan_in, fan_out) = (1000, 10);
let expected_mean = 0_f64;

let expected_var = (gain * sqrt(1. / (fan_in as f64))).powf(2.);
let expected_var = (gain * (1. / (fan_in as f64)).sqrt()).pow(2.);
let tensor: Tensor<TB, 2> = Initializer::KaimingNormal {
gain,
fan_out_only: false,
Expand All @@ -329,7 +332,7 @@ mod tests {
let gain = 2_f64;
let shape = [3];
let fan_in = 5;
let k = gain * sqrt(3.0 / fan_in as f64);
let k = gain * (3.0 / fan_in as f64).sqrt();

let tensor: Tensor<TB, 1> = Initializer::KaimingUniform {
gain,
Expand All @@ -346,7 +349,7 @@ mod tests {

let gain = 2_f64;
let (fan_in, fan_out) = (5, 6);
let k = gain * sqrt(3.0 / fan_out as f64);
let k = gain * (3.0 / fan_out as f64).sqrt();

let tensor: Tensor<TB, 2> = Initializer::KaimingUniform {
gain,
Expand Down Expand Up @@ -379,7 +382,7 @@ mod tests {

let gain = 2.;
let (fan_in, fan_out) = (5, 6);
let bound = gain * sqrt(6. / (fan_in + fan_out) as f64);
let bound = gain * (6. / (fan_in + fan_out) as f64).sqrt();
let tensor: Tensor<TB, 2> = Initializer::XavierUniform { gain }
.init_with(
[fan_out, fan_in],
Expand All @@ -400,7 +403,7 @@ mod tests {
let (fan_in, fan_out) = (1000, 10);
let expected_mean = 0_f64;

let expected_var = (gain * sqrt(2. / (fan_in as f64 + fan_out as f64))).powf(2.);
let expected_var = (gain * (2. / (fan_in as f64 + fan_out as f64)).sqrt()).powf(2.);
let tensor: Tensor<TB, 2> = Initializer::XavierNormal { gain }
.init_with(
[fan_out, fan_in],
Expand Down
10 changes: 5 additions & 5 deletions crates/burn-core/src/nn/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use crate::config::Config;
use crate::module::Module;
use crate::module::Param;
use crate::tensor::{backend::Backend, Tensor};
use libm::sqrt;

use super::Initializer;

Expand All @@ -19,7 +18,9 @@ pub struct LinearConfig {
#[config(default = true)]
pub bias: bool,
/// The type of function used to initialize neural network parameters
#[config(default = "Initializer::KaimingUniform{gain:1.0/sqrt(3.0), fan_out_only:false}")]
#[config(
default = "Initializer::KaimingUniform{gain:1.0/num_traits::Float::sqrt(3.0), fan_out_only:false}"
)]
pub initializer: Initializer,
}

Expand Down Expand Up @@ -80,21 +81,20 @@ mod tests {
use super::*;
use crate::TestBackend;
use burn_tensor::{Data, Shape};
use libm::sqrt;

#[test]
fn initializer_default() {
TestBackend::seed(0);

let config = LinearConfig::new(5, 5);
let k = sqrt(1.0 / config.d_input as f64) as f32;
let k = (1.0 / config.d_input as f64).sqrt() as f32;
let device = Default::default();
let linear = config.init::<TestBackend>(&device);

assert_eq!(
config.initializer,
Initializer::KaimingUniform {
gain: 1.0 / sqrt(3.0),
gain: 1.0 / 3.0f64.sqrt(),
fan_out_only: false
}
);
Expand Down
Loading
Loading