Skip to content

Commit

Permalink
Explicitly unroll integer pow for small exponents
Browse files Browse the repository at this point in the history
The newly optimized loop has introduced a regression in the case
when pow is called with a small constant exponent. LLVM is no longer
able to unroll the loop and the generated code is larger and slower
than what's expected in tests.

Match and handle small exponent values separately by branching out
to an explicit multiplication sequence for that exponent.
Powers larger than 6 need more than three multiplications, so these
cases are less likely to benefit from this optimization, also such
constant exponents are less likely to be used in practice.
For uses with a non-constant exponent, this might also provide
a performance benefit if the exponent is small and does not vary
between successive calls, so the same match arm tends to be taken as
a predicted branch.
  • Loading branch information
mzabaluev committed Jul 11, 2024
1 parent 4cfe24a commit 05ee322
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 12 deletions.
62 changes: 56 additions & 6 deletions core/src/num/int_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2173,10 +2173,35 @@ macro_rules! int_impl {
without modifying the original"]
#[inline]
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
if exp == 0 {
return 1;
}
let mut base = self;

// Unroll multiplications for small exponent values.
// This gives the optimizer a way to efficiently inline call sites
// for the most common use cases with constant exponents.
// Currently, LLVM is unable to unroll the loop below.
match exp {
0 => return 1,
1 => return base,
2 => return base.wrapping_mul(base),
3 => {
let squared = base.wrapping_mul(base);
return squared.wrapping_mul(base);
}
4 => {
let squared = base.wrapping_mul(base);
return squared.wrapping_mul(squared);
}
5 => {
let squared = base.wrapping_mul(base);
return squared.wrapping_mul(squared).wrapping_mul(base);
}
6 => {
let cubed = base.wrapping_mul(base).wrapping_mul(base);
return cubed.wrapping_mul(cubed);
}
_ => {}
}

let mut acc: Self = 1;

loop {
Expand Down Expand Up @@ -2719,10 +2744,35 @@ macro_rules! int_impl {
#[inline]
#[rustc_inherit_overflow_checks]
pub const fn pow(self, mut exp: u32) -> Self {
if exp == 0 {
return 1;
}
let mut base = self;

// Unroll multiplications for small exponent values.
// This gives the optimizer a way to efficiently inline call sites
// for the most common use cases with constant exponents.
// Currently, LLVM is unable to unroll the loop below.
match exp {
0 => return 1,
1 => return base,
2 => return base * base,
3 => {
let squared = base * base;
return squared * base;
}
4 => {
let squared = base * base;
return squared * squared;
}
5 => {
let squared = base * base;
return squared * squared * base;
}
6 => {
let cubed = base * base * base;
return cubed * cubed;
}
_ => {}
}

let mut acc = 1;

loop {
Expand Down
62 changes: 56 additions & 6 deletions core/src/num/uint_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2049,10 +2049,35 @@ macro_rules! uint_impl {
without modifying the original"]
#[inline]
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
if exp == 0 {
return 1;
}
let mut base = self;

// Unroll multiplications for small exponent values.
// This gives the optimizer a way to efficiently inline call sites
// for the most common use cases with constant exponents.
// Currently, LLVM is unable to unroll the loop below.
match exp {
0 => return 1,
1 => return base,
2 => return base.wrapping_mul(base),
3 => {
let squared = base.wrapping_mul(base);
return squared.wrapping_mul(base);
}
4 => {
let squared = base.wrapping_mul(base);
return squared.wrapping_mul(squared);
}
5 => {
let squared = base.wrapping_mul(base);
return squared.wrapping_mul(squared).wrapping_mul(base);
}
6 => {
let cubed = base.wrapping_mul(base).wrapping_mul(base);
return cubed.wrapping_mul(cubed);
}
_ => {}
}

let mut acc: Self = 1;

loop {
Expand Down Expand Up @@ -2544,10 +2569,35 @@ macro_rules! uint_impl {
#[inline]
#[rustc_inherit_overflow_checks]
pub const fn pow(self, mut exp: u32) -> Self {
if exp == 0 {
return 1;
}
let mut base = self;

// Unroll multiplications for small exponent values.
// This gives the optimizer a way to efficiently inline call sites
// for the most common use cases with constant exponents.
// Currently, LLVM is unable to unroll the loop below.
match exp {
0 => return 1,
1 => return base,
2 => return base * base,
3 => {
let squared = base * base;
return squared * base;
}
4 => {
let squared = base * base;
return squared * squared;
}
5 => {
let squared = base * base;
return squared * squared * base;
}
6 => {
let cubed = base * base * base;
return cubed * cubed;
}
_ => {}
}

let mut acc = 1;

loop {
Expand Down

0 comments on commit 05ee322

Please sign in to comment.