Skip to content

Commit

Permalink
cranelift: Drop unused arguments before regalloc
Browse files Browse the repository at this point in the history
Before this, Cranelift ABI code would emit a stack-load instruction for
every stack argument and add all register arguments to the `args`
pseudo-instruction, whether those arguments were used or not.

However, we already know which arguments are used at that point because
we need the analysis for load-sinking, so it's easy to filter the unused
arguments out.

This avoids generating loads that are immediately dead, which is good
for the generated code. It also slightly reduces the size of the
register allocation problem, which is a small win in compile time.

This also changes which registers RA2 chooses in some cases because it
no longer considers unused defs from the `args` pseudo-instruction.

There was an existing method named `arg_is_needed_in_body` which sounded
like it should be the right place to implement this. However, that
method was only used for Baldrdash integration and has been a stub since
that integration was removed in bytecodealliance#4571. Also it didn't have access to the
`value_ir_uses` map needed here. But the place where that method was
called does have access to that map and was perfect for this.

Thanks to @elliottt for doing the initial investigation of this change
with me.
  • Loading branch information
jameysharp committed Apr 22, 2024
1 parent 1fa8de1 commit d3c3011
Show file tree
Hide file tree
Showing 428 changed files with 6,371 additions and 6,784 deletions.
7 changes: 0 additions & 7 deletions cranelift/codegen/src/machinst/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1597,13 +1597,6 @@ impl<M: ABIMachineSpec> Callee<M> {
insts
}

/// Is the given argument needed in the body (as opposed to, e.g., serving
/// only as a special ABI-specific placeholder)? This controls whether
/// lowering will copy it to a virtual reg use by CLIF instructions.
pub fn arg_is_needed_in_body(&self, _idx: usize) -> bool {
true
}

/// Generate an instruction which copies a source register to a return value slot.
pub fn gen_copy_regs_to_retval(
&self,
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/machinst/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
}

for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
if !self.vcode.abi().arg_is_needed_in_body(i) {
if self.value_ir_uses[*param] == ValueUseState::Unused {
continue;
}
let regs = writable_value_regs(self.value_regs[*param]);
Expand Down
48 changes: 24 additions & 24 deletions cranelift/filetests/filetests/isa/aarch64/amodes.clif
Original file line number Diff line number Diff line change
Expand Up @@ -287,16 +287,16 @@ block0(v0: i64, v1: i64, v2: i64):

; VCode:
; block0:
; movn w6, #4097
; mov w6, w6
; ldrsh x0, [x6]
; movn w2, #4097
; mov w2, w2
; ldrsh x0, [x2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w6, #-0x1002
; mov w6, w6
; ldrsh x0, [x6] ; trap: heap_oob
; mov w2, #-0x1002
; mov w2, w2
; ldrsh x0, [x2] ; trap: heap_oob
; ret

function %f19(i64, i64, i64) -> i32 {
Expand All @@ -309,16 +309,16 @@ block0(v0: i64, v1: i64, v2: i64):

; VCode:
; block0:
; movz w6, #4098
; mov w6, w6
; ldrsh x0, [x6]
; movz w2, #4098
; mov w2, w2
; ldrsh x0, [x2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w6, #0x1002
; mov w6, w6
; ldrsh x0, [x6] ; trap: heap_oob
; mov w2, #0x1002
; mov w2, w2
; ldrsh x0, [x2] ; trap: heap_oob
; ret

function %f20(i64, i64, i64) -> i32 {
Expand All @@ -331,16 +331,16 @@ block0(v0: i64, v1: i64, v2: i64):

; VCode:
; block0:
; movn w6, #4097
; sxtw x6, w6
; ldrsh x0, [x6]
; movn w2, #4097
; sxtw x2, w2
; ldrsh x0, [x2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w6, #-0x1002
; sxtw x6, w6
; ldrsh x0, [x6] ; trap: heap_oob
; mov w2, #-0x1002
; sxtw x2, w2
; ldrsh x0, [x2] ; trap: heap_oob
; ret

function %f21(i64, i64, i64) -> i32 {
Expand All @@ -353,16 +353,16 @@ block0(v0: i64, v1: i64, v2: i64):

; VCode:
; block0:
; movz w6, #4098
; sxtw x6, w6
; ldrsh x0, [x6]
; movz w2, #4098
; sxtw x2, w2
; ldrsh x0, [x2]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w6, #0x1002
; sxtw x6, w6
; ldrsh x0, [x6] ; trap: heap_oob
; mov w2, #0x1002
; sxtw x2, w2
; ldrsh x0, [x2] ; trap: heap_oob
; ret

function %i128(i64) -> i128 {
Expand Down
8 changes: 4 additions & 4 deletions cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
Original file line number Diff line number Diff line change
Expand Up @@ -542,14 +542,14 @@ block0(v0: i64):

; VCode:
; block0:
; movz x3, #1
; sub x0, xzr, x3
; movz x1, #1
; sub x0, xzr, x1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov x3, #1
; neg x0, x3
; mov x1, #1
; neg x0, x1
; ret

function %f30(i8x16) -> i8x16 {
Expand Down
116 changes: 0 additions & 116 deletions cranelift/filetests/filetests/isa/aarch64/return-call.clif
Original file line number Diff line number Diff line change
Expand Up @@ -176,25 +176,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; block0:
; ldr x7, [sp, #16]
; ldr x9, [sp, #24]
; ldr x11, [sp, #32]
; ldr x13, [sp, #40]
; ldr x15, [sp, #48]
; ldr x1, [sp, #56]
; ldr x3, [sp, #64]
; ldr x5, [sp, #72]
; ldr x7, [sp, #80]
; ldr x9, [sp, #88]
; ldr x11, [sp, #96]
; ldr x13, [sp, #104]
; ldr x15, [sp, #112]
; ldr x1, [sp, #120]
; ldr x3, [sp, #128]
; ldr x5, [sp, #136]
; ldr x7, [sp, #144]
; ldr x9, [sp, #152]
; ldr x11, [sp, #160]
; ldr x2, [sp, #168]
; ldp fp, lr, [sp], #16
; add sp, sp, #160
Expand All @@ -205,25 +186,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; block1: ; offset 0x8
; ldur x7, [sp, #0x10]
; ldur x9, [sp, #0x18]
; ldur x11, [sp, #0x20]
; ldur x13, [sp, #0x28]
; ldur x15, [sp, #0x30]
; ldur x1, [sp, #0x38]
; ldur x3, [sp, #0x40]
; ldur x5, [sp, #0x48]
; ldur x7, [sp, #0x50]
; ldur x9, [sp, #0x58]
; ldur x11, [sp, #0x60]
; ldur x13, [sp, #0x68]
; ldur x15, [sp, #0x70]
; ldur x1, [sp, #0x78]
; ldur x3, [sp, #0x80]
; ldur x5, [sp, #0x88]
; ldur x7, [sp, #0x90]
; ldur x9, [sp, #0x98]
; ldur x11, [sp, #0xa0]
; ldur x2, [sp, #0xa8]
; ldp x29, x30, [sp], #0x10
; add sp, sp, #0xa0
Expand Down Expand Up @@ -407,25 +369,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; block0:
; ldr x7, [sp, #16]
; ldr x9, [sp, #24]
; ldr x11, [sp, #32]
; ldr x13, [sp, #40]
; ldr x15, [sp, #48]
; ldr x1, [sp, #56]
; ldr x3, [sp, #64]
; ldr x5, [sp, #72]
; ldr x7, [sp, #80]
; ldr x9, [sp, #88]
; ldr x11, [sp, #96]
; ldr x13, [sp, #104]
; ldr x15, [sp, #112]
; ldr x1, [sp, #120]
; ldr x3, [sp, #128]
; ldr x5, [sp, #136]
; ldr x7, [sp, #144]
; ldr x9, [sp, #152]
; ldr x11, [sp, #160]
; ldr x2, [sp, #168]
; ldp fp, lr, [sp], #16
; add sp, sp, #160
Expand All @@ -436,25 +379,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; block1: ; offset 0x8
; ldur x7, [sp, #0x10]
; ldur x9, [sp, #0x18]
; ldur x11, [sp, #0x20]
; ldur x13, [sp, #0x28]
; ldur x15, [sp, #0x30]
; ldur x1, [sp, #0x38]
; ldur x3, [sp, #0x40]
; ldur x5, [sp, #0x48]
; ldur x7, [sp, #0x50]
; ldur x9, [sp, #0x58]
; ldur x11, [sp, #0x60]
; ldur x13, [sp, #0x68]
; ldur x15, [sp, #0x70]
; ldur x1, [sp, #0x78]
; ldur x3, [sp, #0x80]
; ldur x5, [sp, #0x88]
; ldur x7, [sp, #0x90]
; ldur x9, [sp, #0x98]
; ldur x11, [sp, #0xa0]
; ldur x2, [sp, #0xa8]
; ldp x29, x30, [sp], #0x10
; add sp, sp, #0xa0
Expand All @@ -469,26 +393,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; block0:
; ldr x7, [sp, #16]
; ldr x9, [sp, #24]
; ldr x11, [sp, #32]
; ldr x13, [sp, #40]
; ldr x15, [sp, #48]
; ldr x1, [sp, #56]
; ldr x3, [sp, #64]
; ldr x5, [sp, #72]
; ldr x7, [sp, #80]
; ldr x9, [sp, #88]
; ldr x11, [sp, #96]
; ldr x13, [sp, #104]
; ldr x15, [sp, #112]
; ldr x1, [sp, #120]
; ldr x3, [sp, #128]
; ldr x5, [sp, #136]
; ldr x7, [sp, #144]
; ldr x9, [sp, #152]
; ldr x11, [sp, #160]
; ldr x13, [sp, #168]
; ldr x2, [sp, #176]
; ldp fp, lr, [sp], #16
; add sp, sp, #176
Expand All @@ -499,26 +403,6 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; block1: ; offset 0x8
; ldur x7, [sp, #0x10]
; ldur x9, [sp, #0x18]
; ldur x11, [sp, #0x20]
; ldur x13, [sp, #0x28]
; ldur x15, [sp, #0x30]
; ldur x1, [sp, #0x38]
; ldur x3, [sp, #0x40]
; ldur x5, [sp, #0x48]
; ldur x7, [sp, #0x50]
; ldur x9, [sp, #0x58]
; ldur x11, [sp, #0x60]
; ldur x13, [sp, #0x68]
; ldur x15, [sp, #0x70]
; ldur x1, [sp, #0x78]
; ldur x3, [sp, #0x80]
; ldur x5, [sp, #0x88]
; ldur x7, [sp, #0x90]
; ldur x9, [sp, #0x98]
; ldur x11, [sp, #0xa0]
; ldur x13, [sp, #0xa8]
; ldur x2, [sp, #0xb0]
; ldp x29, x30, [sp], #0x10
; add sp, sp, #0xb0
Expand Down
12 changes: 6 additions & 6 deletions cranelift/filetests/filetests/isa/aarch64/simd.clif
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,16 @@ block0(v0: i64, v1: i64):

; VCode:
; block0:
; ldrb w5, [x0]
; dup v0.16b, w5
; dup v1.16b, w5
; ldrb w4, [x0]
; dup v0.16b, w4
; dup v1.16b, w4
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrb w5, [x0] ; trap: heap_oob
; dup v0.16b, w5
; dup v1.16b, w5
; ldrb w4, [x0] ; trap: heap_oob
; dup v0.16b, w4
; dup v1.16b, w4
; ret

function %f9() -> i32x2 {
Expand Down
16 changes: 8 additions & 8 deletions cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ block0(v0: i64):
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
; load_ext_name x2, TestCase(%foo)+0
; blr x2
; load_ext_name x0, TestCase(%foo)+0
; blr x0
; ldp fp, lr, [sp], #16
; ret
;
Expand All @@ -72,11 +72,11 @@ block0(v0: i64):
; cmp sp, x16
; b.lo #0x30
; block1: ; offset 0x14
; ldr x2, #0x1c
; ldr x0, #0x1c
; b #0x24
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
; blr x0
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
Expand All @@ -101,8 +101,8 @@ block0(v0: i64):
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
; load_ext_name x2, TestCase(%foo)+0
; blr x2
; load_ext_name x0, TestCase(%foo)+0
; blr x0
; ldp fp, lr, [sp], #16
; ret
;
Expand All @@ -116,11 +116,11 @@ block0(v0: i64):
; cmp sp, x16
; b.lo #0x38
; block1: ; offset 0x1c
; ldr x2, #0x24
; ldr x0, #0x24
; b #0x2c
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
; blr x0
; ldp x29, x30, [sp], #0x10
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: stk_ovf
Expand Down
Loading

0 comments on commit d3c3011

Please sign in to comment.