From cdecc858b47cb1b27d2c38773d802b237c8ed36a Mon Sep 17 00:00:00 2001 From: yuyang-ok <96557710+yuyang-ok@users.noreply.github.com> Date: Wed, 28 Sep 2022 08:30:31 +0800 Subject: [PATCH] add riscv64 backend for cranelift. (#4271) Add a RISC-V 64 (`riscv64`, RV64GC) backend. Co-authored-by: yuyang <756445638@qq.com> Co-authored-by: Chris Fallin Co-authored-by: Afonso Bordado --- .github/workflows/main.yml | 9 + build.rs | 7 + ci/docker/riscv64gc-linux/Dockerfile | 7 + cranelift/codegen/Cargo.toml | 5 +- cranelift/codegen/build.rs | 12 + cranelift/codegen/meta/src/isa/mod.rs | 7 +- cranelift/codegen/meta/src/isa/riscv64.rs | 27 + cranelift/codegen/src/binemit/mod.rs | 8 + cranelift/codegen/src/isa/mod.rs | 4 + cranelift/codegen/src/isa/riscv64/abi.rs | 716 ++++ cranelift/codegen/src/isa/riscv64/inst.isle | 2084 ++++++++++++ .../codegen/src/isa/riscv64/inst/args.rs | 1972 +++++++++++ .../codegen/src/isa/riscv64/inst/emit.rs | 2920 +++++++++++++++++ .../src/isa/riscv64/inst/emit_tests.rs | 2279 +++++++++++++ .../codegen/src/isa/riscv64/inst/imms.rs | 218 ++ cranelift/codegen/src/isa/riscv64/inst/mod.rs | 1749 ++++++++++ .../codegen/src/isa/riscv64/inst/regs.rs | 220 ++ .../codegen/src/isa/riscv64/inst/unwind.rs | 2 + .../src/isa/riscv64/inst/unwind/systemv.rs | 173 + cranelift/codegen/src/isa/riscv64/lower.isle | 983 ++++++ cranelift/codegen/src/isa/riscv64/lower.rs | 62 + .../codegen/src/isa/riscv64/lower/isle.rs | 544 +++ .../isa/riscv64/lower/isle/generated_code.rs | 9 + .../codegen/src/isa/riscv64/lower_inst.rs | 36 + cranelift/codegen/src/isa/riscv64/mod.rs | 246 ++ cranelift/codegen/src/isa/riscv64/settings.rs | 8 + .../filetests/isa/riscv64/amodes.clif | 365 +++ .../filetests/isa/riscv64/arithmetic.clif | 509 +++ .../filetests/isa/riscv64/atomic-rmw.clif | 210 ++ .../filetests/isa/riscv64/atomic_load.clif | 36 + .../filetests/isa/riscv64/atomic_store.clif | 76 + .../filetests/isa/riscv64/bitops.clif | 929 ++++++ .../filetests/isa/riscv64/call-indirect.clif | 22 + .../filetests/filetests/isa/riscv64/call.clif | 424 +++ .../filetests/isa/riscv64/condbr.clif | 391 +++ .../filetests/isa/riscv64/condops.clif | 86 + .../filetests/isa/riscv64/constants.clif | 328 ++ .../filetests/isa/riscv64/extend-op.clif | 119 + .../filetests/isa/riscv64/fcvt-small.clif | 84 + .../filetests/isa/riscv64/float.clif | 576 ++++ .../filetests/isa/riscv64/heap-addr.clif | 53 + .../isa/riscv64/iconst-icmp-small.clif | 24 + .../filetests/isa/riscv64/multivalue-ret.clif | 17 + .../isa/riscv64/narrow-arithmetic.clif | 58 + .../filetests/isa/riscv64/prologue.clif | 279 ++ .../filetests/isa/riscv64/reduce.clif | 40 + .../filetests/isa/riscv64/reftypes.clif | 103 + .../filetests/isa/riscv64/shift-op.clif | 28 + .../filetests/isa/riscv64/shift-rotate.clif | 451 +++ .../filetests/isa/riscv64/stack-limit.clif | 206 ++ .../filetests/isa/riscv64/stack.clif | 630 ++++ .../filetests/isa/riscv64/symbol-value.clif | 16 + .../filetests/isa/riscv64/traps.clif | 36 + .../isa/riscv64/uextend-sextend.clif | 124 + .../filetests/filetests/runtests/alias.clif | 1 + .../filetests/runtests/arithmetic.clif | 1 + .../runtests/atomic-cas-subword-little.clif | 1 + .../filetests/runtests/atomic-cas.clif | 3 +- .../filetests/runtests/atomic-rmw-little.clif | 1 + .../runtests/atomic-rmw-subword-little.clif | 1 + .../filetests/filetests/runtests/bextend.clif | 1 + .../filetests/filetests/runtests/bint.clif | 1 + .../filetests/filetests/runtests/bitops.clif | 1 + .../filetests/filetests/runtests/bitrev.clif | 1 + .../filetests/filetests/runtests/bmask.clif | 1 + .../filetests/filetests/runtests/br.clif | 1 + .../filetests/filetests/runtests/br_icmp.clif | 2 +- .../filetests/runtests/br_table.clif | 3 +- .../filetests/filetests/runtests/breduce.clif | 1 + .../filetests/filetests/runtests/ceil.clif | 1 + .../filetests/filetests/runtests/cls.clif | 1 + .../filetests/filetests/runtests/clz.clif | 1 + .../filetests/filetests/runtests/const.clif | 1 + .../filetests/runtests/conversion.clif | 1 + .../runtests/conversions-load-store.clif | 1 + .../filetests/filetests/runtests/ctz.clif | 1 + .../filetests/runtests/div-checks.clif | 2 + .../filetests/filetests/runtests/extend.clif | 1 + .../filetests/filetests/runtests/fabs.clif | 1 + .../filetests/filetests/runtests/fadd.clif | 1 + .../filetests/filetests/runtests/fcmp-eq.clif | 1 + .../filetests/filetests/runtests/fcmp-ge.clif | 1 + .../filetests/filetests/runtests/fcmp-gt.clif | 1 + .../filetests/filetests/runtests/fcmp-le.clif | 1 + .../filetests/filetests/runtests/fcmp-lt.clif | 1 + .../filetests/filetests/runtests/fcmp-ne.clif | 1 + .../filetests/runtests/fcmp-one.clif | 1 + .../filetests/runtests/fcmp-ord.clif | 1 + .../filetests/runtests/fcmp-ueq.clif | 1 + .../filetests/runtests/fcmp-uge.clif | 1 + .../filetests/runtests/fcmp-ugt.clif | 1 + .../filetests/runtests/fcmp-ule.clif | 1 + .../filetests/runtests/fcmp-ult.clif | 1 + .../filetests/runtests/fcmp-uno.clif | 2 + .../filetests/runtests/fcopysign.clif | 1 + .../filetests/filetests/runtests/fdiv.clif | 1 + .../filetests/filetests/runtests/floor.clif | 1 + .../filetests/filetests/runtests/fma.clif | 3 +- .../filetests/runtests/fmax-pseudo.clif | 1 + .../filetests/filetests/runtests/fmax.clif | 1 + .../filetests/runtests/fmin-pseudo.clif | 1 + .../filetests/filetests/runtests/fmin.clif | 1 + .../filetests/filetests/runtests/fmul.clif | 1 + .../filetests/filetests/runtests/fneg.clif | 1 + .../filetests/filetests/runtests/fsub.clif | 1 + .../filetests/runtests/global_value.clif | 1 + .../filetests/filetests/runtests/heap.clif | 2 +- .../filetests/runtests/i128-arithmetic.clif | 1 + .../filetests/runtests/i128-bandnot.clif | 1 + .../filetests/runtests/i128-bextend.clif | 1 + .../filetests/runtests/i128-bint.clif | 1 + .../filetests/runtests/i128-bitops-count.clif | 1 + .../filetests/runtests/i128-bitops.clif | 2 + .../filetests/runtests/i128-bitrev.clif | 1 + .../filetests/runtests/i128-bmask.clif | 1 + .../filetests/runtests/i128-bornot.clif | 1 + .../filetests/filetests/runtests/i128-br.clif | 2 +- .../filetests/runtests/i128-breduce.clif | 1 + .../filetests/runtests/i128-bricmp.clif | 1 + .../filetests/runtests/i128-bxornot.clif | 1 + .../filetests/runtests/i128-cls.clif | 1 + .../filetests/runtests/i128-concat-split.clif | 2 + .../filetests/runtests/i128-const.clif | 1 + .../filetests/runtests/i128-extend.clif | 1 + .../filetests/runtests/i128-icmp.clif | 1 + .../filetests/runtests/i128-ireduce.clif | 1 + .../filetests/runtests/i128-load-store.clif | 1 + .../filetests/runtests/i128-rotate.clif | 1 + .../filetests/runtests/i128-select.clif | 1 + .../filetests/runtests/i128-shifts.clif | 2 +- .../filetests/filetests/runtests/iabs.clif | 1 + .../filetests/runtests/icmp-eq-imm.clif | 1 + .../filetests/filetests/runtests/icmp-eq.clif | 1 + .../filetests/filetests/runtests/icmp-ne.clif | 1 + .../filetests/runtests/icmp-sge.clif | 1 + .../filetests/runtests/icmp-sgt.clif | 1 + .../filetests/runtests/icmp-sle.clif | 1 + .../filetests/runtests/icmp-slt.clif | 1 + .../filetests/runtests/icmp-uge.clif | 1 + .../filetests/runtests/icmp-ugt.clif | 1 + .../filetests/runtests/icmp-ule.clif | 1 + .../filetests/filetests/runtests/icmp.clif | 1 + .../filetests/runtests/integer-minmax.clif | 2 + .../filetests/filetests/runtests/ireduce.clif | 1 + .../filetests/runtests/load-op-store.clif | 2 + .../filetests/filetests/runtests/nearest.clif | 1 + .../filetests/runtests/popcnt-interpret.clif | 8 + .../filetests/filetests/runtests/popcnt.clif | 7 +- .../filetests/filetests/runtests/rotl.clif | 2 +- .../filetests/filetests/runtests/rotr.clif | 1 + .../filetests/filetests/runtests/select.clif | 1 + .../filetests/filetests/runtests/shifts.clif | 2 +- .../filetests/runtests/smulhi-aarch64.clif | 1 + .../filetests/filetests/runtests/smulhi.clif | 2 + .../filetests/runtests/spill-reload.clif | 1 + .../filetests/filetests/runtests/sqrt.clif | 1 + .../filetests/filetests/runtests/srem.clif | 4 +- .../filetests/runtests/stack-addr-64.clif | 2 +- .../filetests/filetests/runtests/stack.clif | 1 + .../filetests/runtests/table_addr.clif | 1 + .../filetests/filetests/runtests/trunc.clif | 1 + .../filetests/filetests/runtests/umulhi.clif | 1 + .../filetests/filetests/runtests/urem.clif | 1 + cranelift/filetests/src/test_run.rs | 41 +- cranelift/native/Cargo.toml | 1 + cranelift/object/src/backend.rs | 1 + crates/environ/src/compilation.rs | 1 + crates/fiber/src/unix.rs | 4 +- crates/fiber/src/unix/riscv64.rs | 157 + crates/fuzzing/Cargo.toml | 4 +- crates/fuzzing/src/oracles.rs | 2 +- crates/fuzzing/src/oracles/engine.rs | 4 +- crates/jit/src/debug.rs | 1 + crates/runtime/src/trampolines.rs | 3 + crates/runtime/src/trampolines/riscv64.rs | 117 + crates/runtime/src/traphandlers/backtrace.rs | 3 + .../src/traphandlers/backtrace/riscv64.rs | 21 + crates/runtime/src/traphandlers/unix.rs | 9 +- crates/wasmtime/src/engine.rs | 11 + tests/all/memory.rs | 2 +- tests/host_segfault.rs | 4 + tests/spec_testsuite | 2 +- 182 files changed, 21024 insertions(+), 36 deletions(-) create mode 100644 ci/docker/riscv64gc-linux/Dockerfile create mode 100644 cranelift/codegen/meta/src/isa/riscv64.rs create mode 100644 cranelift/codegen/src/isa/riscv64/abi.rs create mode 100644 cranelift/codegen/src/isa/riscv64/inst.isle create mode 100644 cranelift/codegen/src/isa/riscv64/inst/args.rs create mode 100644 cranelift/codegen/src/isa/riscv64/inst/emit.rs create mode 100644 cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs create mode 100644 cranelift/codegen/src/isa/riscv64/inst/imms.rs create mode 100644 cranelift/codegen/src/isa/riscv64/inst/mod.rs create mode 100644 cranelift/codegen/src/isa/riscv64/inst/regs.rs create mode 100644 cranelift/codegen/src/isa/riscv64/inst/unwind.rs create mode 100644 cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs create mode 100644 cranelift/codegen/src/isa/riscv64/lower.isle create mode 100644 cranelift/codegen/src/isa/riscv64/lower.rs create mode 100644 cranelift/codegen/src/isa/riscv64/lower/isle.rs create mode 100644 cranelift/codegen/src/isa/riscv64/lower/isle/generated_code.rs create mode 100644 cranelift/codegen/src/isa/riscv64/lower_inst.rs create mode 100644 cranelift/codegen/src/isa/riscv64/mod.rs create mode 100644 cranelift/codegen/src/isa/riscv64/settings.rs create mode 100644 cranelift/filetests/filetests/isa/riscv64/amodes.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/arithmetic.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/atomic-rmw.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/atomic_load.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/atomic_store.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/bitops.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/call-indirect.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/call.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/condbr.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/condops.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/constants.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/extend-op.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/float.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/heap-addr.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/multivalue-ret.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/prologue.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/reduce.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/reftypes.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/shift-op.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/stack-limit.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/stack.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/symbol-value.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/traps.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/uextend-sextend.clif create mode 100644 crates/fiber/src/unix/riscv64.rs create mode 100644 crates/runtime/src/trampolines/riscv64.rs create mode 100644 crates/runtime/src/traphandlers/backtrace/riscv64.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fded70ec4622..6f4fab40829a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -233,6 +233,12 @@ jobs: gcc: s390x-linux-gnu-gcc qemu: qemu-s390x -L /usr/s390x-linux-gnu qemu_target: s390x-linux-user + - os: ubuntu-latest + target: riscv64gc-unknown-linux-gnu + gcc_package: gcc-riscv64-linux-gnu + gcc: riscv64-linux-gnu-gcc + qemu: qemu-riscv64 -L /usr/riscv64-linux-gnu + qemu_target: riscv64-linux-user steps: - uses: actions/checkout@v2 with: @@ -401,6 +407,9 @@ jobs: - build: s390x-linux os: ubuntu-latest target: s390x-unknown-linux-gnu + - build: riscv64gc-linux + os: ubuntu-latest + target: riscv64gc-unknown-linux-gnu steps: - uses: actions/checkout@v2 with: diff --git a/build.rs b/build.rs index d44b8c0c0e2d..4baa936c3277 100644 --- a/build.rs +++ b/build.rs @@ -172,6 +172,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { // FIXME: These tests fail under qemu due to a qemu bug. (_, "simd_f32x4_pmin_pmax") if platform_is_s390x() => return true, (_, "simd_f64x2_pmin_pmax") if platform_is_s390x() => return true, + // riscv64 backend does not yet have a fully complete SIMD backend. + ("simd", _) if platform_is_riscv64() => return true, + ("memory64", "simd") if platform_is_riscv64() => return true, _ => {} }, _ => panic!("unrecognized strategy"), @@ -183,3 +186,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { fn platform_is_s390x() -> bool { env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "s390x" } + +fn platform_is_riscv64() -> bool { + env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "riscv64" +} diff --git a/ci/docker/riscv64gc-linux/Dockerfile b/ci/docker/riscv64gc-linux/Dockerfile new file mode 100644 index 000000000000..522867a67cb7 --- /dev/null +++ b/ci/docker/riscv64gc-linux/Dockerfile @@ -0,0 +1,7 @@ +FROM ubuntu:22.04 + +RUN apt-get update -y && apt-get install -y gcc gcc-riscv64-linux-gnu ca-certificates + +ENV PATH=$PATH:/rust/bin +ENV CARGO_BUILD_TARGET=riscv64gc-unknown-linux-gnu +ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER=riscv64-linux-gnu-gcc diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index ec113a194e66..aca0841738c0 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -68,7 +68,7 @@ unwind = ["gimli"] x86 = [] arm64 = [] s390x = [] - +riscv64 = [] # Stub feature that does nothing, for Cargo-features compatibility: the new # backend is the default now. experimental_x64 = [] @@ -77,7 +77,8 @@ experimental_x64 = [] all-arch = [ "x86", "arm64", - "s390x" + "s390x", + "riscv64" ] # For dependent crates that want to serialize some parts of cranelift diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 4960b0c68c02..f81950277ed0 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -187,6 +187,8 @@ fn get_isle_compilations( let src_isa_s390x = make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("s390x")); + let src_isa_risc_v = + make_isle_source_path_relative(&cur_dir, crate_dir.join("src").join("isa").join("riscv64")); // This is a set of ISLE compilation units. // // The format of each entry is: @@ -234,6 +236,16 @@ fn get_isle_compilations( ], untracked_inputs: vec![clif_isle.clone()], }, + // The risc-v instruction selector. + IsleCompilation { + output: out_dir.join("isle_riscv64.rs"), + inputs: vec![ + prelude_isle.clone(), + src_isa_risc_v.join("inst.isle"), + src_isa_risc_v.join("lower.isle"), + ], + untracked_inputs: vec![clif_isle.clone()], + }, ], }) } diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index 6411932b16ad..4d77f9268ddf 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -4,6 +4,7 @@ use crate::shared::Definitions as SharedDefinitions; use std::fmt; mod arm64; +mod riscv64; mod s390x; pub(crate) mod x86; @@ -13,6 +14,7 @@ pub enum Isa { X86, Arm64, S390x, + Riscv64, } impl Isa { @@ -30,13 +32,14 @@ impl Isa { "aarch64" => Some(Isa::Arm64), "s390x" => Some(Isa::S390x), x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), + "riscv64" | "riscv64gc" | "riscv64imac" => Some(Isa::Riscv64), _ => None, } } /// Returns all supported isa targets. pub fn all() -> &'static [Isa] { - &[Isa::X86, Isa::Arm64, Isa::S390x] + &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64] } } @@ -47,6 +50,7 @@ impl fmt::Display for Isa { Isa::X86 => write!(f, "x86"), Isa::Arm64 => write!(f, "arm64"), Isa::S390x => write!(f, "s390x"), + Isa::Riscv64 => write!(f, "riscv64"), } } } @@ -57,6 +61,7 @@ pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec x86::define(shared_defs), Isa::Arm64 => arm64::define(shared_defs), Isa::S390x => s390x::define(shared_defs), + Isa::Riscv64 => riscv64::define(shared_defs), }) .collect() } diff --git a/cranelift/codegen/meta/src/isa/riscv64.rs b/cranelift/codegen/meta/src/isa/riscv64.rs new file mode 100644 index 000000000000..7f392efd023d --- /dev/null +++ b/cranelift/codegen/meta/src/isa/riscv64.rs @@ -0,0 +1,27 @@ +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +use crate::shared::Definitions as SharedDefinitions; + +fn define_settings(_shared: &SettingGroup) -> SettingGroup { + let mut setting = SettingGroupBuilder::new("riscv64"); + + let _has_m = setting.add_bool("has_m", "has extension M?", "", false); + let _has_a = setting.add_bool("has_a", "has extension A?", "", false); + let _has_f = setting.add_bool("has_f", "has extension F?", "", false); + let _has_d = setting.add_bool("has_d", "has extension D?", "", false); + let _has_v = setting.add_bool("has_v", "has extension V?", "", false); + let _has_b = setting.add_bool("has_b", "has extension B?", "", false); + let _has_c = setting.add_bool("has_c", "has extension C?", "", false); + let _has_zbkb = setting.add_bool("has_zbkb", "has extension zbkb?", "", false); + + let _has_zicsr = setting.add_bool("has_zicsr", "has extension zicsr?", "", false); + let _has_zifencei = setting.add_bool("has_zifencei", "has extension zifencei?", "", false); + + setting.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + TargetIsa::new("riscv64", settings) +} diff --git a/cranelift/codegen/src/binemit/mod.rs b/cranelift/codegen/src/binemit/mod.rs index 713d89bec811..96a12b4e23b9 100644 --- a/cranelift/codegen/src/binemit/mod.rs +++ b/cranelift/codegen/src/binemit/mod.rs @@ -66,6 +66,13 @@ pub enum Reloc { /// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12_NC` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage) Aarch64TlsGdAddLo12Nc, + /// procedure call. + /// call symbol + /// expands to the following assembly and relocation: + /// auipc ra, 0 + /// jalr ra, ra, 0 + RiscvCall, + /// s390x TLS GD64 - 64-bit offset of tls_index for GD symbol in GOT S390xTlsGd64, /// s390x TLS GDCall - marker to enable optimization of TLS calls @@ -87,6 +94,7 @@ impl fmt::Display for Reloc { Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"), Self::X86SecRel => write!(f, "SecRel"), Self::Arm32Call | Self::Arm64Call => write!(f, "Call"), + Self::RiscvCall => write!(f, "RiscvCall"), Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"), diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index e513987c6d59..259d2891b9cb 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -66,6 +66,9 @@ pub mod x64; #[cfg(feature = "arm64")] pub(crate) mod aarch64; +#[cfg(feature = "riscv64")] +pub mod riscv64; + #[cfg(feature = "s390x")] mod s390x; @@ -97,6 +100,7 @@ pub fn lookup(triple: Triple) -> Result { } Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple), Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple), + Architecture::Riscv64 { .. } => isa_builder!(riscv64, (feature = "riscv64"), triple), _ => Err(LookupError::Unsupported), } } diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs new file mode 100644 index 000000000000..872f3656f91a --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -0,0 +1,716 @@ +//! Implementation of a standard Riscv64 ABI. + +use crate::ir; +use crate::ir::types::*; + +use crate::ir::ExternalName; +use crate::ir::MemFlags; +use crate::isa; + +use crate::isa::riscv64::{inst::EmitState, inst::*}; +use crate::isa::CallConv; +use crate::machinst::*; + +use crate::ir::types::I8; +use crate::ir::LibCall; +use crate::ir::Signature; +use crate::isa::riscv64::settings::Flags as RiscvFlags; +use crate::isa::unwind::UnwindInst; +use crate::settings; +use crate::CodegenError; +use crate::CodegenResult; +use alloc::boxed::Box; +use alloc::vec::Vec; +use regalloc2::PRegSet; +use regs::x_reg; + +use smallvec::{smallvec, SmallVec}; + +/// Support for the Riscv64 ABI from the callee side (within a function body). +pub(crate) type Riscv64Callee = Callee; + +/// Support for the Riscv64 ABI from the caller side (at a callsite). +pub(crate) type Riscv64ABICaller = Caller; + +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; + +/// Riscv64-specific ABI behavior. This struct just serves as an implementation +/// point for the trait; it is never actually instantiated. +pub struct Riscv64MachineDeps; + +impl IsaFlags for RiscvFlags {} + +impl ABIMachineSpec for Riscv64MachineDeps { + type I = Inst; + type F = RiscvFlags; + + fn word_bits() -> u32 { + 64 + } + + /// Return required stack alignment in bytes. + fn stack_align(_call_conv: isa::CallConv) -> u32 { + 16 + } + + fn compute_arg_locs( + call_conv: isa::CallConv, + _flags: &settings::Flags, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + ) -> CodegenResult<(ABIArgVec, i64, Option)> { + // All registers that can be used as parameters or rets. + // both start and end are included. + let (x_start, x_end, f_start, f_end) = if args_or_rets == ArgsOrRets::Args { + (10, 17, 10, 17) + } else { + let end = if call_conv.extends_wasmtime() { 10 } else { 11 }; + (10, end, 10, end) + }; + let mut next_x_reg = x_start; + let mut next_f_reg = f_start; + // Stack space. + let mut next_stack: u64 = 0; + let mut ret = smallvec![]; + let mut return_one_register_used = false; + + for param in params { + if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { + let offset = next_stack; + assert!(size % 8 == 0, "StructArgument size is not properly aligned"); + next_stack += size as u64; + ret.push(ABIArg::StructArg { + pointer: None, + offset: offset as i64, + size: size as u64, + purpose: param.purpose, + }); + continue; + } + + // Find regclass(es) of the register(s) used to store a value of this type. + let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?; + let mut slots = ABIArgSlotVec::new(); + for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { + let next_reg = + if (next_x_reg <= x_end) && *rc == RegClass::Int && !return_one_register_used { + let x = Some(x_reg(next_x_reg)); + if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { + return_one_register_used = true; + } + next_x_reg += 1; + x + } else if (next_f_reg <= f_end) + && *rc == RegClass::Float + && !return_one_register_used + { + let x = Some(f_reg(next_f_reg)); + if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { + return_one_register_used = true; + } + next_f_reg += 1; + x + } else { + None + }; + if let Some(reg) = next_reg { + slots.push(ABIArgSlot::Reg { + reg: reg.to_real_reg().unwrap(), + ty: *reg_ty, + extension: param.extension, + }); + } else { + // Compute size. For the wasmtime ABI it differs from native + // ABIs in how multiple values are returned, so we take a + // leaf out of arm64's book by not rounding everything up to + // 8 bytes. For all ABI arguments, and other ABI returns, + // though, each slot takes a minimum of 8 bytes. + // + // Note that in all cases 16-byte stack alignment happens + // separately after all args. + let size = (reg_ty.bits() / 8) as u64; + let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { + size + } else { + std::cmp::max(size, 8) + }; + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = align_to(next_stack, size); + slots.push(ABIArgSlot::Stack { + offset: next_stack as i64, + ty: *reg_ty, + extension: param.extension, + }); + next_stack += size; + } + } + ret.push(ABIArg::Slots { + slots, + purpose: param.purpose, + }); + } + let pos: Option = if add_ret_area_ptr { + assert!(ArgsOrRets::Args == args_or_rets); + if next_x_reg <= x_end { + let arg = ABIArg::reg( + x_reg(next_x_reg).to_real_reg().unwrap(), + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + ); + ret.push(arg); + } else { + let arg = ABIArg::stack( + next_stack as i64, + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + ); + ret.push(arg); + next_stack += 8; + } + Some(ret.len() - 1) + } else { + None + }; + next_stack = align_to(next_stack, Self::stack_align(call_conv) as u64); + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + CodegenResult::Ok((ret, next_stack as i64, pos)) + } + + fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { + // lr fp. + 16 + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { + Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()) + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_move(to_reg, from_reg, ty) + } + + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Inst { + assert!(from_bits < to_bits); + Inst::Extend { + rd: to_reg, + rn: from_reg, + signed, + from_bits, + to_bits, + } + } + + fn get_ext_mode( + _call_conv: isa::CallConv, + specified: ir::ArgumentExtension, + ) -> ir::ArgumentExtension { + specified + } + + fn gen_args(_isa_flags: &crate::isa::riscv64::settings::Flags, args: Vec) -> Inst { + Inst::Args { args } + } + + fn gen_ret(_setup_frame: bool, _isa_flags: &Self::F, rets: Vec) -> Inst { + Inst::Ret { rets } + } + + fn get_stacklimit_reg() -> Reg { + spilltmp_reg() + } + + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + if let Some(imm12) = Imm12::maybe_from_u64(imm as u64) { + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: into_reg, + rs: from_reg, + imm12, + }); + } else { + insts.extend(Inst::load_constant_u32( + writable_spilltmp_reg2(), + imm as u64, + )); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: into_reg, + rs1: spilltmp_reg2(), + rs2: from_reg, + }); + } + insts + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Inst::TrapIfC { + cc: IntCC::UnsignedLessThan, + rs1: stack_reg(), + rs2: limit_reg, + trap_code: ir::TrapCode::StackOverflow, + }); + insts + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Inst { + Inst::LoadAddr { + rd: into_reg, + mem: mem.into(), + } + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); + Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()) + } + + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { + let mem = AMode::RegOffset(base, offset as i64, ty); + Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()) + } + + fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec { + let mut insts = SmallVec::new(); + if amount == 0 { + return insts; + } + insts.push(Inst::AjustSp { + amount: amount as i64, + }); + insts + } + + fn gen_nominal_sp_adj(offset: i32) -> Inst { + Inst::VirtualSPOffsetAdj { + amount: offset as i64, + } + } + + fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec { + // add sp,sp,-16 ;; alloc stack space for fp. + // sd ra,8(sp) ;; save ra. + // sd fp,0(sp) ;; store old fp. + // mv fp,sp ;; set fp to sp. + let mut insts = SmallVec::new(); + insts.push(Inst::AjustSp { amount: -16 }); + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(8, I64), + link_reg(), + I64, + )); + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(0, I64), + fp_reg(), + I64, + )); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::PushFrameRegs { + offset_upward_to_caller_sp: 16, // FP, LR + }, + }); + } + insts.push(Inst::Mov { + rd: writable_fp_reg(), + rm: stack_reg(), + ty: I64, + }); + insts + } + /// reverse of gen_prologue_frame_setup. + fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Self::gen_load_stack( + StackAMode::SPOffset(8, I64), + writable_link_reg(), + I64, + )); + insts.push(Self::gen_load_stack( + StackAMode::SPOffset(0, I64), + writable_fp_reg(), + I64, + )); + insts.push(Inst::AjustSp { amount: 16 }); + insts + } + + fn gen_probestack(frame_size: u32) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.extend(Inst::load_constant_u32(writable_a0(), frame_size as u64)); + insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::LibCall(LibCall::Probestack), + uses: smallvec![CallArgPair { + vreg: a0(), + preg: a0(), + }], + defs: smallvec![], + clobbers: PRegSet::empty(), + opcode: Opcode::Call, + callee_callconv: CallConv::SystemV, + caller_callconv: CallConv::SystemV, + }), + }); + insts + } + // Returns stack bytes used as well as instructions. Does not adjust + // nominal SP offset; abi_impl generic code will do that. + fn gen_clobber_save( + _call_conv: isa::CallConv, + setup_frame: bool, + flags: &settings::Flags, + clobbered_callee_saves: &[Writable], + fixed_frame_storage_size: u32, + _outgoing_args_size: u32, + ) -> (u64, SmallVec<[Inst; 16]>) { + let mut insts = SmallVec::new(); + let clobbered_size = compute_clobber_size(&clobbered_callee_saves); + // Adjust the stack pointer downward for clobbers and the function fixed + // frame (spillslots and storage slots). + let stack_size = fixed_frame_storage_size + clobbered_size; + if flags.unwind_info() && setup_frame { + // The *unwind* frame (but not the actual frame) starts at the + // clobbers, just below the saved FP/LR pair. + insts.push(Inst::Unwind { + inst: UnwindInst::DefineNewFrame { + offset_downward_to_clobbers: clobbered_size, + offset_upward_to_caller_sp: 16, // FP, LR + }, + }); + } + // Store each clobbered register in order at offsets from SP, + // placing them above the fixed frame slots. + if stack_size > 0 { + // since we use fp, we didn't need use UnwindInst::StackAlloc. + let mut cur_offset = 8; + for reg in clobbered_callee_saves { + let r_reg = reg.to_reg(); + let ty = match r_reg.class() { + regalloc2::RegClass::Int => I64, + regalloc2::RegClass::Float => F64, + }; + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: clobbered_size - cur_offset, + reg: r_reg, + }, + }); + } + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(-(cur_offset as i64), ty), + real_reg_to_reg(reg.to_reg()), + ty, + )); + cur_offset += 8 + } + insts.push(Inst::AjustSp { + amount: -(stack_size as i64), + }); + } + (clobbered_size as u64, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + sig: &Signature, + _flags: &settings::Flags, + clobbers: &[Writable], + fixed_frame_storage_size: u32, + _outgoing_args_size: u32, + ) -> SmallVec<[Inst; 16]> { + let mut insts = SmallVec::new(); + let clobbered_callee_saves = + Self::get_clobbered_callee_saves(call_conv, _flags, sig, clobbers); + let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves); + if stack_size > 0 { + insts.push(Inst::AjustSp { + amount: stack_size as i64, + }); + } + let mut cur_offset = 8; + for reg in &clobbered_callee_saves { + let rreg = reg.to_reg(); + let ty = match rreg.class() { + regalloc2::RegClass::Int => I64, + regalloc2::RegClass::Float => F64, + }; + insts.push(Self::gen_load_stack( + StackAMode::SPOffset(-cur_offset, ty), + Writable::from_reg(real_reg_to_reg(reg.to_reg())), + ty, + )); + cur_offset += 8 + } + insts + } + + fn gen_call( + dest: &CallDest, + uses: CallArgList, + defs: CallRetList, + clobbers: PRegSet, + opcode: ir::Opcode, + tmp: Writable, + callee_conv: isa::CallConv, + caller_conv: isa::CallConv, + ) -> SmallVec<[Self::I; 2]> { + let mut insts = SmallVec::new(); + match &dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: name.clone(), + uses, + defs, + clobbers, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + }), + }), + &CallDest::ExtName(ref name, RelocDistance::Far) => { + insts.push(Inst::LoadExtName { + rd: tmp, + name: Box::new(name.clone()), + offset: 0, + }); + insts.push(Inst::CallInd { + info: Box::new(CallIndInfo { + rn: tmp.to_reg(), + uses, + defs, + clobbers, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + }), + }); + } + &CallDest::Reg(reg) => insts.push(Inst::CallInd { + info: Box::new(CallIndInfo { + rn: *reg, + uses, + defs, + clobbers, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + }), + }), + } + insts + } + + fn gen_memcpy( + call_conv: isa::CallConv, + dst: Reg, + src: Reg, + tmp: Writable, + _tmp2: Writable, + size: usize, + ) -> SmallVec<[Self::I; 8]> { + let mut insts = SmallVec::new(); + let arg0 = Writable::from_reg(x_reg(10)); + let arg1 = Writable::from_reg(x_reg(11)); + let arg2 = Writable::from_reg(x_reg(12)); + insts.extend(Inst::load_constant_u64(tmp, size as u64).into_iter()); + insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::LibCall(LibCall::Memcpy), + uses: smallvec![ + CallArgPair { + vreg: dst, + preg: arg0.to_reg() + }, + CallArgPair { + vreg: src, + preg: arg1.to_reg() + }, + CallArgPair { + vreg: tmp.to_reg(), + preg: arg2.to_reg() + } + ], + defs: smallvec![], + clobbers: Self::get_regs_clobbered_by_call(call_conv), + opcode: Opcode::Call, + caller_callconv: call_conv, + callee_callconv: call_conv, + }), + }); + insts + } + + fn get_number_of_spillslots_for_value(rc: RegClass, _target_vector_bytes: u32) -> u32 { + // We allocate in terms of 8-byte slots. + match rc { + RegClass::Int => 1, + RegClass::Float => 1, + } + } + + /// Get the current virtual-SP offset from an instruction-emission state. + fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 { + s.virtual_sp_offset + } + + /// Get the nominal-SP-to-FP offset from an instruction-emission state. + fn get_nominal_sp_to_fp(s: &EmitState) -> i64 { + s.nominal_sp_to_fp + } + + fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet { + let mut v = PRegSet::empty(); + for (k, need_save) in CALLER_SAVE_X_REG.iter().enumerate() { + if !*need_save { + continue; + } + v.add(px_reg(k)); + } + for (k, need_save) in CALLER_SAVE_F_REG.iter().enumerate() { + if !*need_save { + continue; + } + v.add(pf_reg(k)); + } + v + } + + fn get_clobbered_callee_saves( + call_conv: isa::CallConv, + _flags: &settings::Flags, + _sig: &Signature, + regs: &[Writable], + ) -> Vec> { + let mut regs: Vec> = regs + .iter() + .cloned() + .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg())) + .collect(); + + regs.sort(); + regs + } + + fn is_frame_setup_needed( + is_leaf: bool, + stack_args_size: u32, + num_clobbered_callee_saves: usize, + fixed_frame_storage_size: u32, + ) -> bool { + !is_leaf + // The function arguments that are passed on the stack are addressed + // relative to the Frame Pointer. + || stack_args_size > 0 + || num_clobbered_callee_saves > 0 + || fixed_frame_storage_size > 0 + } + + fn gen_inline_probestack(frame_size: u32, guard_size: u32) -> SmallInstVec { + // Unroll at most n consecutive probes, before falling back to using a loop + const PROBE_MAX_UNROLL: u32 = 3; + // Number of probes that we need to perform + let probe_count = align_to(frame_size, guard_size) / guard_size; + + if probe_count <= PROBE_MAX_UNROLL { + Self::gen_probestack_unroll(guard_size, probe_count) + } else { + Self::gen_probestack_loop(guard_size, probe_count) + } + } +} + +const CALLER_SAVE_X_REG: [bool; 32] = [ + false, true, false, false, false, true, true, true, // 0-7 + false, false, true, true, true, true, true, true, // 8-15 + true, true, false, false, false, false, false, false, // 16-23 + false, false, false, false, true, true, true, true, // 24-31 +]; +const CALLEE_SAVE_X_REG: [bool; 32] = [ + false, false, true, false, false, false, false, false, // 0-7 + true, true, false, false, false, false, false, false, // 8-15 + false, false, true, true, true, true, true, true, // 16-23 + true, true, true, true, false, false, false, false, // 24-31 +]; +const CALLER_SAVE_F_REG: [bool; 32] = [ + true, true, true, true, true, true, true, true, // 0-7 + false, true, true, true, true, true, true, true, // 8-15 + true, true, false, false, false, false, false, false, // 16-23 + false, false, false, false, true, true, true, true, // 24-31 +]; +const CALLEE_SAVE_F_REG: [bool; 32] = [ + false, false, false, false, false, false, false, false, // 0-7 + true, false, false, false, false, false, false, false, // 8-15 + false, false, true, true, true, true, true, true, // 16-23 + true, true, true, true, false, false, false, false, // 24-31 +]; + +/// This should be the registers that must be saved by callee. +#[inline] +fn is_reg_saved_in_prologue(_conv: CallConv, reg: RealReg) -> bool { + if reg.class() == RegClass::Int { + CALLEE_SAVE_X_REG[reg.hw_enc() as usize] + } else { + CALLEE_SAVE_F_REG[reg.hw_enc() as usize] + } +} + +fn compute_clobber_size(clobbers: &[Writable]) -> u32 { + let mut clobbered_size = 0; + for reg in clobbers { + match reg.to_reg().class() { + RegClass::Int => { + clobbered_size += 8; + } + RegClass::Float => { + clobbered_size += 8; + } + } + } + align_to(clobbered_size, 16) +} + +impl Riscv64MachineDeps { + fn gen_probestack_unroll(guard_size: u32, probe_count: u32) -> SmallInstVec { + let mut insts = SmallVec::with_capacity(probe_count as usize); + for i in 0..probe_count { + let offset = (guard_size * (i + 1)) as i64; + insts.push(Self::gen_store_stack( + StackAMode::SPOffset(-offset, I8), + zero_reg(), + I32, + )); + } + insts + } + fn gen_probestack_loop(guard_size: u32, probe_count: u32) -> SmallInstVec { + smallvec![Inst::StackProbeLoop { + guard_size, + probe_count, + tmp: Writable::from_reg(x_reg(28)), // t3 + }] + } +} diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle new file mode 100644 index 000000000000..2f1b06ee696c --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -0,0 +1,2084 @@ +;; Instruction formats. +(type MInst + (enum + ;; A no-op of zero size. + (Nop0) + (Nop4) + + ;; load immediate + (Lui + (rd WritableReg) + (imm Imm20)) + + (Auipc + (rd WritableReg) + (imm Imm20)) + + ;; An ALU operation with one register sources and a register destination. + (FpuRR + (alu_op FpuOPRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs Reg)) + + + ;; An ALU operation with two register sources and a register destination. + (AluRRR + (alu_op AluOPRRR) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg)) + + ;; An ALU operation with two register sources and a register destination. + (FpuRRR + (alu_op FpuOPRRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg)) + + ;; An ALU operation with three register sources and a register destination. + (FpuRRRR + (alu_op FpuOPRRRR) + (frm OptionFloatRoundingMode) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg) + (rs3 Reg)) + + ;; An ALU operation with a register source and an immediate-12 source, and a register + ;; destination. + (AluRRImm12 + (alu_op AluOPRRI) + (rd WritableReg) + (rs Reg) + (imm12 Imm12)) + + ;; An load + (Load + (rd WritableReg) + (op LoadOP) + (flags MemFlags) + (from AMode)) + ;; An Store + (Store + (to AMode) + (op StoreOP) + (flags MemFlags) + (src Reg)) + + ;; A pseudo-instruction that captures register arguments in vregs. + (Args + (args VecArgPair)) + + (Ret (rets VecReg)) + + (Extend + (rd WritableReg) + (rn Reg) + (signed bool) + (from_bits u8) + (to_bits u8)) + + (AjustSp + (amount i64)) + (Call + (info BoxCallInfo)) + + ;; A machine indirect-call instruction. + (CallInd + (info BoxCallIndInfo)) + + (TrapIf + (test Reg) + (trap_code TrapCode)) + + ;; use a simple compare to decide to cause trap or not. + (TrapIfC + (rs1 Reg) + (rs2 Reg) + (cc IntCC) + (trap_code TrapCode)) + (TrapFf + (cc FloatCC) + (x Reg) + (y Reg) + (ty Type) + (tmp WritableReg) + (trap_code TrapCode)) + + (Jal + ;; (rd WritableReg) don't use + (dest BranchTarget)) + + (CondBr + (taken BranchTarget) + (not_taken BranchTarget) + (kind IntegerCompare)) + + ;; Load an inline symbol reference. + (LoadExtName + (rd WritableReg) + (name BoxExternalName) + (offset i64)) + + ;; Load address referenced by `mem` into `rd`. + (LoadAddr + (rd WritableReg) + (mem AMode)) + + ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This + ;; controls how AMode::NominalSPOffset args are lowered. + (VirtualSPOffsetAdj + (amount i64)) + + ;; A MOV instruction. These are encoded as OrR's (AluRRR form) but we + ;; keep them separate at the `Inst` level for better pretty-printing + ;; and faster `is_move()` logic. + (Mov + (rd WritableReg) + (rm Reg) + (ty Type)) + + (Fence + (pred FenceReq) + (succ FenceReq)) + + (FenceI) + + (ECall) + + (EBreak) + + ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at + ;; runtime. + (Udf + (trap_code TrapCode)) + ;; a jump and link register operation + (Jalr + ;;Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL with rd=x0. + (rd WritableReg) + (base Reg) + (offset Imm12)) + + ;; atomic operations. + (Atomic + (op AtomicOP) + (rd WritableReg) + (addr Reg) + (src Reg) + (amo AMO)) + ;; an atomic store + (AtomicStore + (src Reg) + (ty Type) + (p Reg)) + ;; an atomic load. + (AtomicLoad + (rd WritableReg) + (ty Type) + (p Reg)) + + ;; an atomic nand need using loop to implement. + (AtomicRmwLoop + (offset Reg) + (op AtomicRmwOp) + (dst WritableReg) + (ty Type) + (p Reg) + (x Reg) + (t0 WritableReg)) + + ;; a float compare + (Fcmp + (cc FloatCC) + (rd WritableReg) + (rs1 Reg) + (rs2 Reg) + (ty Type)) + + ;; select x or y base on condition + (Select + (dst VecWritableReg) + (ty Type) + (condition Reg) + (x ValueRegs) + (y ValueRegs)) + + (ReferenceCheck + (rd WritableReg) + (op ReferenceCheckOP) + (x Reg)) + + (BrTableCheck + (index Reg) + (targets_len i32) + (default_ BranchTarget)) + (BrTable + (index Reg) + (tmp1 WritableReg) + (targets VecBranchTarget)) + + ;; atomic compare and set operation + (AtomicCas + (offset Reg) + (t0 WritableReg) + (dst WritableReg) + (e Reg) + (addr Reg) + (v Reg) + (ty Type)) + ;; select x or y base on op_code + (IntSelect + (op IntSelectOP) + (dst VecWritableReg) + (x ValueRegs) + (y ValueRegs) + (ty Type)) + ;; risc-v csr operations. + (Csr + (csr_op CsrOP) + (rd WritableReg) + (rs OptionReg) + (imm OptionUimm5) + (csr CsrAddress)) + ;; an integer compare. + (Icmp + (cc IntCC) + (rd WritableReg) + (a ValueRegs) + (b ValueRegs) + (ty Type)) + ;; select a reg base on condition. + ;; very useful because in lowering stage we can not have condition branch. + (SelectReg + (rd WritableReg) + (rs1 Reg) + (rs2 Reg) + (condition IntegerCompare)) + ;; + (FcvtToInt + (is_sat bool) + (rd WritableReg) + (tmp WritableReg) ;; a float register to load bounds. + (rs Reg) + (is_signed bool) + (in_type Type) + (out_type Type)) + (SelectIf + (if_spectre_guard bool) + (rd VecWritableReg) + (test Reg) + (x ValueRegs) + (y ValueRegs)) + (RawData (data VecU8)) + + ;; An unwind pseudo-instruction. + (Unwind + (inst UnwindInst)) + + ;; A dummy use, useful to keep a value alive. + (DummyUse + (reg Reg)) + ;;; + (FloatRound + (op FloatRoundOP) + (rd WritableReg) + (int_tmp WritableReg) + (f_tmp WritableReg) + (rs Reg) + (ty Type)) + ;;;; FMax + (FloatSelect + (op FloatSelectOP) + (rd WritableReg) + ;; a integer register + (tmp WritableReg) + (rs1 Reg) + (rs2 Reg) + (ty Type)) + (FloatSelectPseudo + (op FloatSelectOP) + (rd WritableReg) + ;; a integer register + (tmp WritableReg) + (rs1 Reg) + (rs2 Reg) + (ty Type)) + + ;; popcnt if target doesn't support extension B + ;; use iteration to implement. + (Popcnt + (sum WritableReg) + (step WritableReg) + (tmp WritableReg) + (rs Reg) + (ty Type)) + + ;;; counting leading or trailing zeros. + (Cltz + ;; leading or trailing. + (leading bool) + (sum WritableReg) + (step WritableReg) + (tmp WritableReg) + (rs Reg) + (ty Type)) + ;; Byte-reverse register + (Rev8 + (rs Reg) + (step WritableReg) + (tmp WritableReg) + (rd WritableReg)) + ;; + (Brev8 + (rs Reg) + (ty Type) + (step WritableReg) + (tmp WritableReg) + (tmp2 WritableReg) + (rd WritableReg)) + (StackProbeLoop + (guard_size u32) + (probe_count u32) + (tmp WritableReg)) +)) + + +(type FloatSelectOP (enum + (Max) + (Min) +)) + +(type FloatRoundOP (enum + (Nearest) + (Ceil) + (Floor) + (Trunc) +)) + +(type CsrOP(enum + (Csrrw) + (Csrrs) + (Csrrc) + (Csrrwi) + (Csrrsi) + (Csrrci) +)) + +(type IntSelectOP (enum + (Imax) + (Umax) + (Imin) + (Umin) +)) + +(type ReferenceCheckOP(enum + (IsNull) + (IsInvalid) +)) + +(type AtomicOP (enum + (LrW) + (ScW) + (AmoswapW) + (AmoaddW) + (AmoxorW) + (AmoandW) + (AmoorW) + (AmominW) + (AmomaxW) + (AmominuW) + (AmomaxuW) + (LrD) + (ScD) + (AmoswapD) + (AmoaddD) + (AmoxorD) + (AmoandD) + (AmoorD) + (AmominD) + (AmomaxD) + (AmominuD) + (AmomaxuD) +)) + +(type FpuOPRRRR(enum + ;; float32 + (FmaddS) + (FmsubS) + (FnmsubS) + (FnmaddS) + ;; float64 + (FmaddD) + (FmsubD) + (FnmsubD) + (FnmaddD) +)) + +(type FClassResult(enum + ;;0 rs1 is −∞. + (NegInfinite) + ;; 1 rs1 is a negative normal number. + (NegNormal) + ;; 2 rs1 is a negative subnormal number. + (NegSubNormal) + ;; 3 rs1 is −0. + (NegZero) + ;; 4 rs1 is +0. + (PosZero) + ;; 5 rs1 is a positive subnormal number. + (PosSubNormal) + ;; 6 rs1 is a positive normal number. + (PosNormal) + ;; 7 rs1 is +∞. + (PosInfinite) + ;; 8 rs1 is a signaling NaN. + (SNaN) + ;; 9 rs1 is a quiet NaN. + (QNaN) +)) + +(type FpuOPRR (enum + ;; RV32F Standard Extension + (FsqrtS) + (FcvtWS) + (FcvtWuS) + (FmvXW) + (FclassS) + (FcvtSw) + (FcvtSwU) + (FmvWX) + + + ;; RV64F Standard Extension (in addition to RV32F) + (FcvtLS) + (FcvtLuS) + (FcvtSL) + (FcvtSLU) + + + ;; RV64D Standard Extension (in addition to RV32D) + (FcvtLD) + (FcvtLuD) + (FmvXD) + (FcvtDL) + (FcvtDLu) + (FmvDX) + + ;; RV32D Standard Extension + (FsqrtD) + (FcvtSD) + (FcvtDS) + (FclassD) + (FcvtWD) + (FcvtWuD) + (FcvtDW) + (FcvtDWU) + ;; bitmapip + +)) + +(type LoadOP (enum + (Lb) + (Lh) + (Lw) + (Lbu) + (Lhu) + (Lwu) + (Ld) + (Flw) + (Fld) +)) + +(type StoreOP(enum + (Sb) + (Sh) + (Sw) + (Sd) + (Fsw) + (Fsd) +)) + +(type AluOPRRR (enum + ;; base set + (Add) + (Sub) + (Sll) + (Slt) + (SltU) + (Sgt) + (Sgtu) + (Xor) + (Srl) + (Sra) + (Or) + (And) + + ;; RV64I Base Instruction Set (in addition to RV32I) + (Addw) + (Subw) + (Sllw) + (Srlw) + (Sraw) + + + ;;RV32M Standard Extension + (Mul) + (Mulh) + (Mulhsu) + (Mulhu) + (Div) + (DivU) + (Rem) + (RemU) + + ;; RV64M Standard Extension (in addition to RV32M) + + (Mulw) + (Divw) + (Divuw) + (Remw) + (Remuw) + + ;; bitmapip + (Adduw) + (Andn) + (Bclr) + (Bext) + (Binv) + (Bset) + (Clmul) + (Clmulh) + (Clmulr) + (Max) + (Maxu) + (Min) + (Minu) + (Orn) + (Rol) + (Rolw) + (Ror) + (Rorw) + (Sh1add) + (Sh1adduw) + (Sh2add) + (Sh2adduw) + (Sh3add) + (Sh3adduw) + (Xnor) +)) + + +(type FpuOPRRR (enum + ;; RV32F Standard Extension + (FaddS) + (FsubS) + (FmulS) + (FdivS) + + (FsgnjS) + (FsgnjnS) + (FsgnjxS) + (FminS) + (FmaxS) + (FeqS) + (FltS) + (FleS) + + ;; RV32D Standard Extension + (FaddD) + (FsubD) + (FmulD) + (FdivD) + (FsgnjD) + (FsgnjnD) + (FsgnjxD) + (FminD) + (FmaxD) + (FeqD) + (FltD) + (FleD) +)) + + + +(type AluOPRRI (enum + (Addi) + (Slti) + (SltiU) + (Xori) + (Ori) + (Andi) + (Slli) + (Srli) + (Srai) + (Addiw) + (Slliw) + (SrliW) + (Sraiw) + (Bclri) + (Bexti) + (Binvi) + (Bseti) + (Rori) + (Roriw) + (SlliUw) + (Clz) + (Clzw) + (Cpop) + (Cpopw) + (Ctz) + (Ctzw) + (Rev8) + (Sextb) + (Sexth) + (Zexth) + (Orcb) + (Brev8) +)) + + +(type FRM (enum + ;; Round to Nearest, ties to Even + (RNE) + ;; Round towards Zero + (RTZ) + ;; Round Down (towards −∞) + (RDN) + ;; Round Up (towards +∞) + (RUP) + ;; Round to Nearest, ties to Max Magnitude + (RMM) + ;; In instruction’s rm field, selects dynamic rounding mode; + ;;In Rounding Mode register, Invalid. + (Fcsr) +)) + +(type FFlagsException (enum + ;; Invalid Operation + (NV) + ;; Divide by Zero + (DZ) + ;; Overflow + (OF) + ;; Underflow + (UF) + ;; Inexact + (NX) +)) + +;;;; input output read write +;;;; SI SO SR SW +;;;; PI PO PR PW +;;;; lowest four bit are used. +(type FenceReq (primitive u8)) + +(type FenceFm (enum + (None) + (Tso) +)) + + +(type VecBranchTarget (primitive VecBranchTarget)) +(type BoxCallInfo (primitive BoxCallInfo)) +(type BoxCallIndInfo (primitive BoxCallIndInfo)) +(type IntegerCompare (primitive IntegerCompare)) +(type AMode (primitive AMode)) +(type OptionReg (primitive OptionReg)) +(type OptionImm12 (primitive OptionImm12)) +(type OptionUimm5 (primitive OptionUimm5)) +(type Imm12 (primitive Imm12)) +(type UImm5 (primitive UImm5)) +(type Imm20 (primitive Imm20)) +(type Imm3 (primitive Imm3)) +(type BranchTarget (primitive BranchTarget)) +(type CsrAddress (primitive CsrAddress)) +(type OptionFloatRoundingMode (primitive OptionFloatRoundingMode)) +(type VecU8 (primitive VecU8)) +(type AMO (primitive AMO)) +(type VecMachLabel extern (enum)) + +;; Helper for creating the zero register. +(decl zero_reg () Reg) +(extern constructor zero_reg zero_reg) + +(decl gen_float_round(FloatRoundOP Reg Type) Reg) +(rule + (gen_float_round op rs ty) + (let + ((rd WritableReg (temp_writable_reg ty)) + (tmp WritableReg (temp_writable_reg $I64)) + (tmp2 WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FloatRound op rd tmp tmp2 rs ty)))) + (writable_reg_to_reg rd))) + +(decl gen_float_select_pseudo (FloatSelectOP Reg Reg Type) Reg) +(rule + (gen_float_select_pseudo op x y ty) + (let + ((rd WritableReg (temp_writable_reg ty)) + (tmp WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.FloatSelectPseudo op rd tmp x y ty)))) + (writable_reg_to_reg rd))) + +(decl gen_float_select (FloatSelectOP Reg Reg Type) Reg) +(rule + (gen_float_select op x y ty) + (let + ((rd WritableReg (temp_writable_reg ty)) + (tmp WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.FloatSelect op rd tmp x y ty)))) + (writable_reg_to_reg rd))) + +;; for load immediate +(decl imm (Type u64) Reg) +(extern constructor imm imm) + +;; for load immediate +(decl imm_from_bits (u64) Imm12) +(extern constructor imm_from_bits imm_from_bits) + +(decl imm_from_neg_bits (i64) Imm12) +(extern constructor imm_from_neg_bits imm_from_neg_bits) +;; +(decl imm12_from_u64 (Imm12) u64) +(extern extractor imm12_from_u64 imm12_from_u64) + +(decl writable_zero_reg () WritableReg) +(extern constructor writable_zero_reg writable_zero_reg) + +(decl gen_default_frm() OptionFloatRoundingMode) +(extern constructor gen_default_frm gen_default_frm) + +;; Helper for emitting `MInst.FpuRR` instructions. +(decl fpu_rr (FpuOPRR Type Reg) Reg) +(rule (fpu_rr op ty src) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRR op (gen_default_frm) dst src)))) + dst)) + +;; Helper for emitting `MInst.AluRRR` instructions. +(decl alu_rrr (AluOPRRR Reg Reg) Reg) +(rule (alu_rrr op src1 src2) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRR op dst src1 src2)))) + dst)) + +;; Helper for emit rd = rs1 + rs2 for Interger. +(decl alu_add(Reg Reg)Reg) +(rule + (alu_add rs1 rs2) + (alu_rrr (AluOPRRR.Add) rs1 rs2)) + +(decl alu_and(Reg Reg)Reg) +(rule + (alu_and rs1 rs2) + (alu_rrr (AluOPRRR.And) rs1 rs2)) + + +;; Helper for emit rd = rs1 - rs2 for Interger. +(decl alu_sub(Reg Reg)Reg) +(rule + (alu_sub rs1 rs2) + (alu_rrr (AluOPRRR.Sub) rs1 rs2)) + +(decl pack_float_rounding_mode(FRM)OptionFloatRoundingMode) +(extern constructor pack_float_rounding_mode pack_float_rounding_mode) + +;; Helper for emitting `MInst.AluRRR` instructions. +(decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg) +(rule (fpu_rrr op ty src1 src2) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRRR op (gen_default_frm) dst src1 src2)))) + dst)) + + +;; Helper for emitting `MInst.FpuRRRR` instructions. +(decl fpu_rrrr (FpuOPRRRR Type Reg Reg Reg) Reg) +(rule (fpu_rrrr op ty src1 src2 src3) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRRRR op (gen_default_frm) dst src1 src2 src3)))) + dst)) + + +;; Helper for emitting `MInst.AluRRImm12` instructions. +(decl alu_rr_imm12 (AluOPRRI Reg Imm12) Reg) +(rule (alu_rr_imm12 op src imm) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRImm12 op dst src imm)))) + dst)) + +(decl alu_andi (Reg i32)Reg) +(rule (alu_andi r i) + (alu_rr_imm12 (AluOPRRI.Andi) r (imm12_const i))) + + +(decl alu_slli (Reg i32)Reg) +(rule (alu_slli r i) + (alu_rr_imm12 (AluOPRRI.Slli) r (imm12_const i))) +(decl alu_srli (Reg i32)Reg) +(rule (alu_srli r i) + (alu_rr_imm12 (AluOPRRI.Srli) r (imm12_const i))) + +;; some instruction use imm12 as funct12. +;; so we don't need the imm12 paramter. +(decl alu_rr_funct12 (AluOPRRI Reg) Reg) +(rule (alu_rr_funct12 op src) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero))))) + dst)) + +;; extend int if need. +(decl ext_int_if_need(bool ValueRegs Type) ValueRegs) +;;; for I8 and I16 ... +(rule + (ext_int_if_need signed val (fits_in_32 ty)) + (gen_extend val signed (ty_bits ty) 64)) +;;; otherwise this is a I64 or I128 +;;; no need to extend. +(rule + (ext_int_if_need _ r $I64) + r) +(rule + (ext_int_if_need _ r $I128) + r) + + +;; Helper for get negative of Imm12 +(decl neg_imm12 (Imm12) Imm12) +(extern constructor neg_imm12 neg_imm12) + + +;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`. +(decl imm12_from_value (Imm12) Value) +(extractor + (imm12_from_value n) + (def_inst (iconst (u64_from_imm64 (imm12_from_u64 n))))) + +(decl select_addi (Type) AluOPRRI) +(rule (select_addi (fits_in_32 ty)) (AluOPRRI.Addiw)) +(rule (select_addi (fits_in_64 ty)) (AluOPRRI.Addi)) + + +(decl bnot_128 (ValueRegs) ValueRegs) +(rule + (bnot_128 val) + (let + (;; low part. + (low Reg (gen_bit_not (value_regs_get val 0))) + ;; high part. + (high Reg (gen_bit_not (value_regs_get val 1)))) + (value_regs low high))) + +(decl lower_bit_reverse (Reg Type)Reg) + +(rule + (lower_bit_reverse r $I8) + (gen_brev8 r $I8)) + +(rule + (lower_bit_reverse r $I16) + (let + ((tmp Reg (gen_brev8 r $I16)) + (tmp2 Reg (gen_rev8 tmp)) + (result Reg (alu_rr_imm12 (AluOPRRI.Srli) tmp2 (imm12_const 48)))) + result)) + +(rule + (lower_bit_reverse r $I32) + (let + ((tmp Reg (gen_brev8 r $I32)) + (tmp2 Reg (gen_rev8 tmp)) + (result Reg (alu_rr_imm12 (AluOPRRI.Srli) tmp2 (imm12_const 32)))) + result)) + +(rule + (lower_bit_reverse r $I64) + (let + ((tmp Reg (gen_rev8 r))) + (gen_brev8 tmp $I64))) + + +(decl imm12_zero()Imm12) +(rule + (imm12_zero) + (imm12_const 0)) + +(decl lower_ctz (Type Reg)Reg) +(rule + (lower_ctz ty x) + (if-let $false(has_b)) + (gen_cltz $false x ty)) + +(rule + (lower_ctz $I64 x) + (if-let $true(has_b)) + (alu_rr_funct12 (AluOPRRI.Ctz) x)) + +(rule + (lower_ctz $I32 x) + (if-let $true(has_b)) + (alu_rr_funct12 (AluOPRRI.Ctzw) x)) +;;;; for I8 and I16 +(rule + (lower_ctz ty x) + (if-let $true(has_b)) + (let + ((tmp Reg (alu_rr_imm12 (AluOPRRI.Bseti) x (imm12_const (ty_bits ty))))) + (alu_rr_funct12 (AluOPRRI.Ctzw) x))) + +;;;; +(decl lower_ctz_128(ValueRegs)ValueRegs) +(rule + (lower_ctz_128 x) + (let + (;; count the low part. + (low Reg (lower_ctz $I64 (value_regs_get x 0))) + ;; count the high part. + (high_part Reg (lower_ctz $I64 (value_regs_get x 1))) + ;;; + (constant_64 Reg (load_u64_constant 64)) + ;;; + (high Reg(gen_select_reg (IntCC.Equal) constant_64 low high_part (zero_reg))) + + ;; add low and high together. + (result Reg (alu_add low high))) + (value_regs result (zero_reg)))) + +(convert u8 i32 u8_as_i32) +(decl u8_as_i32(u8) i32) +(extern constructor u8_as_i32 u8_as_i32) + +(convert u8 u64 u8_as_u64) +(decl lower_clz (Type Reg)Reg) +(rule + (lower_clz ty rs) + (if-let $false (has_b)) + (gen_cltz $true rs ty)) +(rule + (lower_clz $I64 r) + (if-let $true (has_b)) + (alu_rr_funct12 (AluOPRRI.Clz) r)) +(rule + (lower_clz $I32 r) + (if-let $true (has_b)) + (alu_rr_funct12 (AluOPRRI.Clzw) r)) + +;;; for I8 and I16 +(rule + (lower_clz ty r) + (if-let $true (has_b)) + (let + ( ;; narrow int make all upper bits are zeros. + (tmp Reg (ext_int_if_need $false r ty )) + ;; + (count Reg (alu_rr_funct12 (AluOPRRI.Clz) tmp)) + ;;make result + (result Reg (alu_rr_imm12 (AluOPRRI.Addi) count (imm12_const_add (ty_bits ty) -64)))) + result)) + +;; paramter is "intcc compare_a compare_b rs1 rs2". +(decl gen_select_reg (IntCC Reg Reg Reg Reg)Reg) +(extern constructor gen_select_reg gen_select_reg) + +;; load a constant into reg. +(decl load_u64_constant (u64)Reg) +(extern constructor load_u64_constant load_u64_constant) + +(decl lower_clz_i128 (ValueRegs)ValueRegs) +(rule + (lower_clz_i128 x) + (let + ( ;; count high part. + (high Reg (lower_clz $I64 (value_regs_get x 1))) + ;; coumt low part. + (low_part Reg (lower_clz $I64 (value_regs_get x 0))) + ;;; load constant 64. + (constant_64 Reg (load_u64_constant 64)) + (low Reg (gen_select_reg (IntCC.Equal) constant_64 high low_part (zero_reg))) + ;; add low and high together. + (result Reg (alu_add high low))) + (value_regs result (zero_reg)))) + +(decl gen_extend (Reg bool u8 u8) Reg) +(rule + (gen_extend r is_signed from_bits to_bits) + (let + ((tmp WritableReg (temp_writable_reg $I16)) + (_ Unit (emit (MInst.Extend tmp r is_signed from_bits to_bits)))) + tmp)) + +;; val is_signed from_bits to_bits +(decl lower_extend(Reg bool u8 u8)ValueRegs) +(rule + (lower_extend r is_signed from_bits to_bits) + (gen_extend r is_signed from_bits to_bits)) + +;;;; for I128 signed extend. +(rule + (lower_extend r $true 64 128) + (let + ((tmp Reg (alu_rrr (AluOPRRR.Slt) r (zero_reg))) + (high Reg (gen_extend tmp $true 1 64))) + (value_regs (gen_move2 r $I64 $I64) high))) + +(rule + (lower_extend r $true from_bits 128) + (let + ((tmp Reg (gen_extend r $true from_bits 64)) + (tmp2 Reg (alu_rrr (AluOPRRR.Slt) tmp (zero_reg))) + (high Reg (gen_extend tmp2 $true 1 64))) + (value_regs (gen_move2 r $I64 $I64) high))) + + +;;;; for I128 unsigned extend. +(rule + (lower_extend r $false 64 128) + (value_regs (gen_move2 r $I64 $I64) (zero_reg))) + +(rule + (lower_extend r $false from_bits 128) + (value_regs (gen_extend r $false from_bits 64) (zero_reg))) + +;; extract the sign bit of integer. +(decl ext_sign_bit(Type Reg) Reg) +(extern constructor ext_sign_bit ext_sign_bit) + +(decl lower_b128_binary(AluOPRRR ValueRegs ValueRegs) ValueRegs) +(rule + (lower_b128_binary op a b) + (let + ( ;; low part. + (low Reg (alu_rrr op (value_regs_get a 0) (value_regs_get b 0))) + ;; high part. + (high Reg (alu_rrr op (value_regs_get a 1) (value_regs_get b 1)))) + (value_regs low high))) + +(decl lower_umlhi (Type Reg Reg)Reg) +(rule + (lower_umlhi $I64 rs1 rs2) + (alu_rrr (AluOPRRR.Mulhu) rs1 rs2)) + +(rule + (lower_umlhi ty rs1 rs2) + (let + ((tmp Reg (alu_rrr (AluOPRRR.Mul) (ext_int_if_need $false rs1 ty) (ext_int_if_need $false rs2 ty)))) + (alu_rr_imm12 (AluOPRRI.Srli) tmp (imm12_const (ty_bits ty))))) + +(decl lower_smlhi (Type Reg Reg)Reg) +(rule + (lower_smlhi $I64 rs1 rs2) + (alu_rrr (AluOPRRR.Mulh) rs1 rs2)) + +(rule + (lower_smlhi ty rs1 rs2) + (let + ((tmp Reg (alu_rrr (AluOPRRR.Mul) rs1 rs2))) + (alu_rr_imm12 (AluOPRRI.Srli) tmp (imm12_const (ty_bits ty))))) + + +;;; has extension B?? +(decl pure has_b() bool) +(extern constructor has_b has_b) + +(decl lower_rotl(Type Reg Reg) Reg) + +(rule + (lower_rotl $I64 rs amount) + (if-let $true (has_b)) + (alu_rrr (AluOPRRR.Rol) rs amount)) + +(rule + (lower_rotl $I64 rs amount) + (if-let $false (has_b)) + (lower_rotl_shift $I64 rs amount)) + +(rule + (lower_rotl $I32 rs amount) + (if-let $true (has_b)) + (alu_rrr (AluOPRRR.Rolw) rs amount)) + +(rule + (lower_rotl $I32 rs amount) + (if-let $false (has_b)) + (lower_rotl_shift $I32 rs amount)) + +(rule + (lower_rotl ty rs amount) + (lower_rotl_shift ty rs amount)) + +;;; using shift to implement rotl. +(decl lower_rotl_shift(Type Reg Reg) Reg) + +;;; for I8 and I16 ... +(rule + (lower_rotl_shift ty rs amount) + (let + ((x ValueRegs (gen_shamt ty amount)) + (shamt Reg (value_regs_get x 0)) + (len_sub_shamt Reg (value_regs_get x 1)) + ;; + (part1 Reg(alu_rrr (AluOPRRR.Sll) rs shamt)) + ;; + (part2 Reg(alu_rrr (AluOPRRR.Srl) rs len_sub_shamt)) + (part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2))) + (alu_rrr (AluOPRRR.Or) part1 part3))) + + +;;;; construct shift amount +;;;; this will return shift amount and (ty_bits - "shift amount") +(decl gen_shamt(Type Reg) ValueRegs) +(extern constructor gen_shamt gen_shamt) + +(decl lower_rotr(Type Reg Reg) Reg) + +(rule + (lower_rotr $I64 rs amount) + (if-let $true (has_b)) + (alu_rrr (AluOPRRR.Ror) rs amount)) +(rule + (lower_rotr $I64 rs amount) + (if-let $false (has_b)) + (lower_rotr_shift $I64 rs amount)) + +(rule + (lower_rotr $I32 rs amount) + (if-let $true (has_b)) + (alu_rrr (AluOPRRR.Rorw) rs amount)) + +(rule + (lower_rotr $I32 rs amount) + (if-let $false (has_b)) + (lower_rotr_shift $I32 rs amount)) + +(rule + (lower_rotr ty rs amount) + (lower_rotr_shift ty rs amount)) + +(decl lower_rotr_shift (Type Reg Reg) Reg) + +;;; +(rule + (lower_rotr_shift ty rs amount) + (let + ((x ValueRegs (gen_shamt ty amount)) + (shamt Reg (value_regs_get x 0)) + (len_sub_shamt Reg (value_regs_get x 1)) + ;; + (part1 Reg(alu_rrr (AluOPRRR.Srl) rs shamt)) + ;; + (part2 Reg(alu_rrr (AluOPRRR.Sll) rs len_sub_shamt)) + ;; + (part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2))) + (alu_rrr (AluOPRRR.Or) part1 part3))) + +(decl lower_cls(Reg Type) Reg) +(rule + (lower_cls r ty) + (let + ( ;; extract sign bit. + (tmp Reg (ext_int_if_need $true r ty)) + ;; + (tmp2 Reg (gen_select_reg (IntCC.SignedLessThan) tmp (zero_reg) (gen_bit_not r) r)) + ;; + (tmp3 Reg (lower_clz ty tmp2))) + (alu_rr_imm12 (AluOPRRI.Addi) tmp3 (imm12_const -1)))) + +(decl gen_cltz(bool Reg Type) Reg) +(rule + (gen_cltz leading rs ty) + (let + ((tmp WritableReg (temp_writable_reg $I64)) + (step WritableReg (temp_writable_reg $I64)) + (sum WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Cltz leading sum step tmp rs ty)))) + (writable_reg_to_reg sum))) + +(decl gen_popcnt(Reg Type) Reg) +(rule + (gen_popcnt rs ty) + (let + ((tmp WritableReg (temp_writable_reg $I64)) + (step WritableReg (temp_writable_reg $I64)) + (sum WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Popcnt sum step tmp rs ty)))) + (writable_reg_to_reg sum))) + +(decl lower_popcnt (Reg Type) Reg) +(rule (lower_popcnt rs ty ) + (if-let $true (has_b)) + (alu_rr_funct12 (AluOPRRI.Cpop) (ext_int_if_need $false rs ty))) +(rule (lower_popcnt rs ty) + (if-let $false (has_b)) + (gen_popcnt rs ty)) + +(decl lower_popcnt_i128(ValueRegs) ValueRegs) +(rule + (lower_popcnt_i128 a) + (let + ( ;; low part. + (low Reg (lower_popcnt (value_regs_get a 0) $I64)) + ;; high part. + (high Reg (lower_popcnt (value_regs_get a 1) $I64)) + ;; add toghter. + (result Reg (alu_add low high))) + (value_regs result (zero_reg)))) + +(decl lower_i128_rotl(ValueRegs ValueRegs) ValueRegs) +(rule + (lower_i128_rotl x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt Reg (value_regs_get tmp 0)) + (len_sub_shamt Reg (value_regs_get tmp 1)) + ;; + (low_part1 Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 0) shamt)) + (low_part2 Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 1) len_sub_shamt)) + ;;; if shamt == 0 low_part2 will overflow we should zero instead. + (low_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2)) + (low Reg (alu_rrr (AluOPRRR.Or) low_part1 low_part3)) + ;; + (high_part1 Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 1) shamt)) + (high_part2 Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 0) len_sub_shamt)) + (high_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2)) + (high Reg (alu_rrr (AluOPRRR.Or) high_part1 high_part3)) + ;; + (const64 Reg (load_u64_constant 64))) + ;; right now we only rotate less than 64 bits. + ;; if shamt is greater than 64 , we should switch low and high. + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 low high) + ))) + + +(decl lower_i128_rotr(ValueRegs ValueRegs) ValueRegs) +(rule + (lower_i128_rotr x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt Reg (value_regs_get tmp 0)) + (len_sub_shamt Reg (value_regs_get tmp 1)) + ;; + (low_part1 Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 0) shamt)) + (low_part2 Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 1) len_sub_shamt)) + ;;; if shamt == 0 low_part2 will overflow we should zero instead. + (low_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part2)) + (low Reg (alu_rrr (AluOPRRR.Or) low_part1 low_part3)) + ;; + (high_part1 Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 1) shamt)) + (high_part2 Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 0) len_sub_shamt)) + (high_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2)) + (high Reg (alu_rrr (AluOPRRR.Or) high_part1 high_part3)) + + ;; + (const64 Reg (load_u64_constant 64))) + ;; right now we only rotate less than 64 bits. + ;; if shamt is greater than 64 , we should switch low and high. + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 low high) + ))) + + +(decl lower_i128_ishl(ValueRegs ValueRegs)ValueRegs) +(rule + (lower_i128_ishl x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt Reg (value_regs_get tmp 0)) + (len_sub_shamt Reg (value_regs_get tmp 1)) + ;; + (low Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 0) shamt)) + ;; high part. + (high_part1 Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 0) len_sub_shamt)) + (high_part2 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part1)) + ;; + (high_part3 Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 1) shamt)) + (high Reg (alu_rrr (AluOPRRR.Or) high_part2 high_part3 )) + ;; + (const64 Reg (load_u64_constant 64))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 (zero_reg) low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 low high)))) + +(decl lower_i128_ushr(ValueRegs ValueRegs)ValueRegs) +(rule + (lower_i128_ushr x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt Reg (value_regs_get tmp 0)) + (len_sub_shamt Reg (value_regs_get tmp 1)) + + ;; low part. + (low_part1 Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 1) len_sub_shamt)) + (low_part2 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1)) + ;; + (low_part3 Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 0) shamt)) + (low Reg (alu_rrr (AluOPRRR.Or) low_part2 low_part3 )) + ;; + (const64 Reg (load_u64_constant 64)) + + ;; + (high Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 1) shamt))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 (zero_reg) high)))) + + +(decl lower_i128_sshr(ValueRegs ValueRegs)ValueRegs) +(rule + (lower_i128_sshr x y) + (let + ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0))) + (shamt Reg (value_regs_get tmp 0)) + (len_sub_shamt Reg (value_regs_get tmp 1)) + + ;; low part. + (low_part1 Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 1) len_sub_shamt)) + (low_part2 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) low_part1)) + ;; + (low_part3 Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 0) shamt)) + (low Reg (alu_rrr (AluOPRRR.Or) low_part2 low_part3 )) + ;; + (const64 Reg (load_u64_constant 64)) + ;; + (high Reg (alu_rrr (AluOPRRR.Sra) (value_regs_get x 1) shamt)) + ;; + (const_neg_1 Reg(load_imm12 -1)) + ;; + (high_replacement Reg(gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) const_neg_1 (zero_reg)))) + (value_regs + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high_replacement high)))) + +(decl load_imm12(i32) Reg) +(rule + (load_imm12 x) + (alu_rr_imm12 (AluOPRRI.Addi) (zero_reg) (imm12_const x))) + +;; Let me always get low part of ValueRegs. +;; Sometimes I only need lowest bits, like `I8 << I128`. +(decl valueregs_2_reg(Reg) Value) +(extern extractor infallible valueregs_2_reg valueregs_2_reg) + +(decl lower_cls_i128 (ValueRegs) ValueRegs) +(rule + (lower_cls_i128 x) + (let + ( ;;; we use clz to implement cls + ;;; if value is negtive we need inverse all bits. + (low Reg + (gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) (gen_bit_not (value_regs_get x 0)) (value_regs_get x 0))) + ;;; + (high Reg + (gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) (gen_bit_not (value_regs_get x 1)) (value_regs_get x 1))) + ;; count leading zeros. + (tmp ValueRegs (lower_clz_i128 (value_regs low high))) + (count Reg (value_regs_get tmp 0)) + (result Reg (alu_rr_imm12 (AluOPRRI.Addi) count (imm12_const -1)))) + (value_regs result (zero_reg)))) + +(decl imm12_const (i32) Imm12) +(extern constructor imm12_const imm12_const) + +;;;; +(decl imm12_const_add (i32 i32) Imm12) +(extern constructor imm12_const_add imm12_const_add) + +(decl imm12_and (Imm12 i32) Imm12) +(extern constructor imm12_and imm12_and) + + +(decl gen_amode(Reg Offset32 Type)AMode) +(extern constructor gen_amode gen_amode) + +(decl offset32_imm (i32)Offset32) +(extern constructor offset32_imm offset32_imm) + +;; helper function to load from memory. +(decl gen_load(Reg Offset32 LoadOP MemFlags Type)Reg) +(rule + (gen_load p offset op flags ty) + (let + ((tmp WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.Load tmp op flags (gen_amode p offset $I64))))) + tmp)) + +(decl gen_load_128(Reg Offset32 MemFlags)ValueRegs) +(rule + (gen_load_128 p offset flags) + (let + ((low Reg (gen_load p offset (LoadOP.Ld) flags $I64)) + (high Reg (gen_load p (offset32_add offset 8) (LoadOP.Ld) flags $I64))) + (value_regs low high))) + +(decl default_memflags ()MemFlags) +(extern constructor default_memflags default_memflags) + +(decl offset32_add(Offset32 i64)Offset32) +(extern constructor offset32_add offset32_add) + +;; helper function to store to memory. +(decl gen_store(Reg Offset32 StoreOP MemFlags Reg)InstOutput) +(rule + (gen_store base offset op flags src) + (side_effect (SideEffectNoResult.Inst (MInst.Store (gen_amode base offset $I64) op flags src))) +) + +(decl gen_store_128(Reg Offset32 MemFlags ValueRegs)InstOutput) +(rule + (gen_store_128 p offset flags src) + (side_effect + (SideEffectNoResult.Inst2 + (MInst.Store (gen_amode p offset $I64) (StoreOP.Sd) flags (value_regs_get src 0)) + (MInst.Store (gen_amode p (offset32_add offset 8) $I64) (StoreOP.Sd) flags (value_regs_get src 1))))) + +(decl valid_atomic_transaction (Type) Type) +(extern extractor valid_atomic_transaction valid_atomic_transaction) + +;;helper function. +;;construct an atomic instruction. +(decl gen_atomic (AtomicOP Reg Reg AMO)Reg) +(rule + (gen_atomic op addr src amo) + (let + ((tmp WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Atomic op tmp addr src amo)))) + tmp)) + +;; helper function +(decl get_atomic_rmw_op(Type AtomicRmwOp)AtomicOP) +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Add)) + (AtomicOP.AmoaddW)) +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Add)) + (AtomicOP.AmoaddD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.And)) + (AtomicOP.AmoandW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.And)) + (AtomicOP.AmoandD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Or)) + (AtomicOP.AmoorW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Or)) + (AtomicOP.AmoorD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Smax)) + (AtomicOP.AmomaxW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Smax)) + (AtomicOP.AmomaxD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Smin)) + (AtomicOP.AmominW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Smin)) + (AtomicOP.AmominD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Umax)) + (AtomicOP.AmomaxuW) +) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Umax)) + (AtomicOP.AmomaxuD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Umin)) + (AtomicOP.AmominuW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Umin)) + (AtomicOP.AmominuD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Xchg)) + (AtomicOP.AmoswapW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Xchg)) + (AtomicOP.AmoswapD)) + +(rule + (get_atomic_rmw_op $I32 (AtomicRmwOp.Xor)) + (AtomicOP.AmoxorW)) + +(rule + (get_atomic_rmw_op $I64 (AtomicRmwOp.Xor)) + (AtomicOP.AmoxorD)) + +(decl atomic_amo()AMO) +(extern constructor atomic_amo atomic_amo) + + +(decl gen_atomic_load(Reg Type)Reg) +(rule + (gen_atomic_load p ty) + (let + ((tmp WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AtomicLoad tmp ty p)))) + (writable_reg_to_reg tmp))) + +;;; +(decl gen_atomic_store(Reg Type Reg)InstOutput) +(rule + (gen_atomic_store p ty src) + (side_effect (SideEffectNoResult.Inst (MInst.AtomicStore src ty p))) +) + +;; helper function to inverse all bits. +(decl gen_bit_not (Reg) Reg) +(rule (gen_bit_not r) + (alu_rr_imm12 (AluOPRRI.Xori) r (imm12_const -1))) + + +;; float arithmatic op +(decl f_arithmatic_op (Type Opcode) FpuOPRRR) +(rule + (f_arithmatic_op $F32 (Opcode.Fadd)) + (FpuOPRRR.FaddS)) + +(rule + (f_arithmatic_op $F64 (Opcode.Fadd)) + (FpuOPRRR.FaddD)) + +(rule + (f_arithmatic_op $F32 (Opcode.Fsub)) + (FpuOPRRR.FsubS)) +(rule + (f_arithmatic_op $F64 (Opcode.Fsub)) + (FpuOPRRR.FsubD)) + +(rule + (f_arithmatic_op $F32 (Opcode.Fmul)) + (FpuOPRRR.FmulS)) + +(rule + (f_arithmatic_op $F64 (Opcode.Fmul)) + (FpuOPRRR.FmulD)) + +(rule + (f_arithmatic_op $F32 (Opcode.Fdiv)) + (FpuOPRRR.FdivS)) + +(rule + (f_arithmatic_op $F64 (Opcode.Fdiv)) + (FpuOPRRR.FdivD)) + + +(decl move_f_to_x(Reg Type) Reg) +(extern constructor move_f_to_x move_f_to_x) + +(decl move_x_to_f(Reg Type) Reg) +(extern constructor move_x_to_f move_x_to_f) + + +;;float copy sign bit op. +(decl f_copysign_op (Type)FpuOPRRR) +(rule (f_copysign_op $F32) (FpuOPRRR.FsgnjS)) +(rule (f_copysign_op $F64) (FpuOPRRR.FsgnjD)) + +;;float copy neg sign bit op. +(decl f_copy_neg_sign_op (Type)FpuOPRRR) +(rule (f_copy_neg_sign_op $F32) (FpuOPRRR.FsgnjnS)) +(rule (f_copy_neg_sign_op $F64) (FpuOPRRR.FsgnjnD)) + +(decl fabs_copy_sign(Type)FpuOPRRR) +(rule (fabs_copy_sign $F32) (FpuOPRRR.FsgnjxS)) +(rule (fabs_copy_sign $F64) (FpuOPRRR.FsgnjxD)) + +(decl gen_stack_addr (StackSlot Offset32) Reg ) +(extern constructor gen_stack_addr gen_stack_addr) + + +;; parameter are 'source register' 'in_ty' 'out_ty' +(decl gen_move2(Reg Type Type) Reg) +(extern constructor gen_move2 gen_move2) + +;;; generate a move and reinterprete the data +;; parameter is "rs" "in_type" "out_type" +(decl gen_moves(ValueRegs Type Type) ValueRegs) +(extern constructor gen_moves gen_moves) + +;; +(decl gen_reference_check(ReferenceCheckOP Reg)Reg) +(rule + (gen_reference_check op r) + (let + ((tmp WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.ReferenceCheck tmp op r)))) + tmp)) + +;; +(decl gen_select(Type Reg ValueRegs ValueRegs)ValueRegs) +(rule + (gen_select ty c x y) + (let + ((dst VecWritableReg (alloc_vec_writable ty)) + ;; + (reuslt VecWritableReg (vec_writable_clone dst)) + (_ Unit (emit (MInst.Select dst ty c x y)))) + (vec_writable_to_regs reuslt))) + +;;; clone WritableReg +;;; if not rust compiler will complain about use moved value. +(decl vec_writable_clone(VecWritableReg)VecWritableReg) +(extern constructor vec_writable_clone vec_writable_clone) + +(decl vec_writable_to_regs(VecWritableReg)ValueRegs) +(extern constructor vec_writable_to_regs vec_writable_to_regs) + +(decl alloc_vec_writable(Type)VecWritableReg) +(extern constructor alloc_vec_writable alloc_vec_writable) + +(decl gen_bitselect(Type Reg Reg Reg)Reg) +(rule + (gen_bitselect ty c x y) + (let + ((tmp_x Reg (alu_rrr (AluOPRRR.And) c x)) + ;;;inverse condition + (c_inverse Reg (gen_bit_not c)) + ;;;get all y part. + (tmp_y Reg (alu_rrr (AluOPRRR.And) c_inverse y)) + ;;;get reuslt. + (result Reg (alu_rrr (AluOPRRR.Or) tmp_x tmp_y))) + result)) + +(decl gen_bint(Reg)Reg) +(rule + (gen_bint r) + (alu_rr_imm12 (AluOPRRI.Andi) r (imm12_const 1))) + +(decl gen_int_select(Type IntSelectOP ValueRegs ValueRegs)ValueRegs) +(rule + (gen_int_select ty op x y) + (let + ( ;;; + (dst VecWritableReg (alloc_vec_writable ty)) + ;;; + (_ Unit (emit (MInst.IntSelect op (vec_writable_clone dst) x y ty)))) + (vec_writable_to_regs dst))) + +(decl udf (TrapCode)InstOutput) +(rule + (udf code) + (side_effect (SideEffectNoResult.Inst (MInst.Udf code)))) + +(decl load_op (Type)LoadOP) +(extern constructor load_op load_op) + +(decl store_op (Type) StoreOP) +(extern constructor store_op store_op) + +;; bool is "is_signed" +(decl int_load_op(bool u8)LoadOP) +(rule + (int_load_op $false 8) + (LoadOP.Lbu)) + +(rule + (int_load_op $true 8) + (LoadOP.Lb)) + +(rule + (int_load_op $false 16) + (LoadOP.Lhu)) +(rule + (int_load_op $true 16) + (LoadOP.Lh)) +(rule + (int_load_op $false 32) + (LoadOP.Lwu)) +(rule + (int_load_op $true 32) + (LoadOP.Lw)) + +(rule + (int_load_op _ 64) + (LoadOP.Ld)) + +;;;; load extern name +(decl load_ext_name(ExternalName i64) Reg) +(extern constructor load_ext_name load_ext_name) + +(decl int_convert_2_float_op(Type bool Type)FpuOPRR) +(extern constructor int_convert_2_float_op int_convert_2_float_op) + +;;;; +(decl gen_fcvt_int(bool Reg bool Type Type)Reg) +(rule + (gen_fcvt_int is_sat rs is_signed in_type out_type) + (let + ((result WritableReg (temp_writable_reg out_type)) + (tmp WritableReg (temp_writable_reg $F64)) + (_ Unit(emit (MInst.FcvtToInt is_sat result tmp rs is_signed in_type out_type)))) + result)) + +;;;; in_type out_type +;;;; out_type is returned. +(decl pure valid_bextend_ty(Type Type) Type) +(extern constructor valid_bextend_ty valid_bextend_ty) + + +;;; some float binary operation +;;; 1. need move into x reister. +;;; 2. do the operation. +;;; 3. move back. +(decl lower_float_binary(AluOPRRR Reg Reg Type)Reg) +(rule + (lower_float_binary op rs1 rs2 ty) + (let + ((x_rs1 Reg (move_f_to_x rs1 ty)) + (x_rs2 Reg (move_f_to_x rs2 ty)) + ;;; + (tmp Reg (alu_rrr op x_rs1 x_rs2))) + ;;; move back. + (move_x_to_f tmp ty))) + +;;;; +(decl lower_float_bnot(Reg Type)Reg) +(rule + (lower_float_bnot x ty) + (let + (;; move to x register. + (tmp Reg (move_f_to_x x ty)) + ;; inverse all bits. + (tmp2 Reg (gen_bit_not tmp))) + ;; move back to float register. + (move_x_to_f tmp2 ty))) + + +(decl convert_valueregs_reg(ValueRegs)Reg) +(rule + (convert_valueregs_reg x) + (value_regs_get x 0)) +(convert ValueRegs Reg convert_valueregs_reg) + +;;; intcc is not equal nor ne. +;;; intcc is >= <= ... +;;; return alongside with if signed. +(decl intcc_is_gt_etc (IntCC bool)IntCC) +(extern extractor intcc_is_gt_etc intcc_is_gt_etc) + +(decl intcc_is_eq_or_ne (IntCC)IntCC) +(extern extractor intcc_is_eq_or_ne intcc_is_eq_or_ne) + +;;; lower icmp +(decl lower_icmp(IntCC ValueRegs ValueRegs Type) Reg) +;;; eq or ne. +(rule + (lower_icmp (intcc_is_eq_or_ne cc) x y ty) + (gen_icmp cc (ext_int_if_need $false x ty) (ext_int_if_need $false y ty) ty)) +;;;; singed >= ... +(rule + (lower_icmp (intcc_is_gt_etc cc $true) x y ty) + (gen_icmp cc (ext_int_if_need $true x ty) (ext_int_if_need $true y ty) ty)) +;;;; unsigned >= ... +(rule + (lower_icmp (intcc_is_gt_etc cc $false) x y ty) + (gen_icmp cc (ext_int_if_need $false x ty ) (ext_int_if_need $false y ty) ty)) + +(decl lower_icmp_over_flow(ValueRegs ValueRegs Type)Reg) + +;;; for I8 I16 I32 +(rule 1 + (lower_icmp_over_flow x y ty) + (let + ((tmp Reg (alu_sub (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + (tmp2 WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Extend tmp2 tmp $true (ty_bits ty) 64)))) + (gen_icmp (IntCC.NotEqual) (writable_reg_to_reg tmp2) tmp $I64))) + +;;; $I64 +(rule 3 + (lower_icmp_over_flow x y $I64) + (let + ((y_sign Reg (alu_rrr (AluOPRRR.Sgt) y (zero_reg))) + (sub_result Reg (alu_sub x y)) + (tmp Reg (alu_rrr (AluOPRRR.Slt) sub_result x))) + (gen_icmp (IntCC.NotEqual) y_sign tmp $I64))) + +;;; $I128 +(rule 2 + (lower_icmp_over_flow x y $I128) + (let + ( ;; x sign bit. + (xs Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get x 1) (imm12_const 63))) + ;; y sign bit. + (ys Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get y 1) (imm12_const 63))) + ;; + (sub_result ValueRegs (i128_sub x y)) + ;; result sign bit. + (rs Reg (alu_rr_imm12 (AluOPRRI.Srli) (value_regs_get sub_result 1) (imm12_const 63))) + + ;;; xs && !ys && !rs + ;;; x is positive y is negtive and result is negative. + ;;; must overflow + (tmp1 Reg (alu_and xs (alu_and (gen_bit_not ys) (gen_bit_not rs)))) + ;;; !xs && ys && rs + ;;; x is negative y is positive and result is positive. + ;;; overflow + (tmp2 Reg (alu_and (gen_bit_not xs) (alu_and ys rs))) + ;;;tmp3 + (tmp3 Reg (alu_rrr (AluOPRRR.Or) tmp1 tmp2))) + (gen_extend tmp3 $true 1 64))) + +(decl i128_sub (ValueRegs ValueRegs)ValueRegs) +(rule + (i128_sub x y ) + (let + (;; low part. + (low Reg(alu_rrr (AluOPRRR.Sub) (value_regs_get x 0) (value_regs_get y 0))) + ;; compute borrow. + (borrow Reg (alu_rrr (AluOPRRR.SltU) (value_regs_get x 0) low)) + ;; + (high_tmp Reg (alu_rrr (AluOPRRR.Sub) (value_regs_get x 1) (value_regs_get y 1))) + ;; + (high Reg (alu_rrr (AluOPRRR.Sub) high_tmp borrow))) + (value_regs low high))) + + +(decl gen_fabs(Reg Type)Reg) +(rule + (gen_fabs x ty) + (fpu_rrr (fabs_copy_sign ty) ty x x)) + +;;; right now only return if overflow. +(decl lower_uadd_overflow (Reg Reg Type) Reg) + +(rule + (lower_uadd_overflow x y $I64) + (let + ((tmp Reg (alu_add x y))) + (gen_icmp (IntCC.UnsignedLessThan) tmp x $I64))) + +(rule + (lower_uadd_overflow x y (fits_in_32 ty)) + (let + ((tmp_x Reg (ext_int_if_need $false x ty)) + (tmp_y Reg (ext_int_if_need $false y ty)) + (sum Reg (alu_add tmp_x tmp_y))) + (alu_srli sum (ty_bits ty)))) + +(decl inst_output_get (InstOutput u8) ValueRegs) +(extern constructor inst_output_get inst_output_get) + +(decl label_to_br_target (MachLabel)BranchTarget) +(extern constructor label_to_br_target label_to_br_target) + +(decl gen_jump (MachLabel)MInst) +(rule + (gen_jump v) + (MInst.Jal (label_to_br_target v))) + +(decl vec_label_get (VecMachLabel u8) MachLabel ) +(extern constructor vec_label_get vec_label_get) + +(decl lower_branch (Inst VecMachLabel) InstOutput) +(rule (lower_branch (jump _ _) targets ) + (side_effect (SideEffectNoResult.Inst (gen_jump (vec_label_get targets 0))))) + +;;; cc a b targets Type +(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) InstOutput) +(extern constructor lower_br_icmp lower_br_icmp) + +(decl lower_br_fcmp (FloatCC Reg Reg VecMachLabel Type) InstOutput) +(extern constructor lower_br_fcmp lower_br_fcmp) + +;; int scalar zero regs. +(decl int_zero_reg (Type) ValueRegs) +(extern constructor int_zero_reg int_zero_reg) + +(decl lower_brz_or_nz (IntCC ValueRegs VecMachLabel Type) InstOutput) +(extern constructor lower_brz_or_nz lower_brz_or_nz) + +;;;;; +(rule + (lower_branch (brz v @ (value_type ty) _ _) targets) + (lower_brz_or_nz (IntCC.Equal) v targets ty)) +;;;; +(rule + (lower_branch (brnz v @ (value_type ty) _ _) targets) + (lower_brz_or_nz (IntCC.NotEqual) v targets ty)) + +;;; +(rule + (lower_branch (br_icmp cc a @ (value_type ty) b _ _) targets) + (lower_br_icmp cc a b targets ty)) + +(rule + (lower_branch (brif cc (ifcmp a @ (value_type ty) b) _ _) targets) + (lower_br_icmp cc a b targets ty)) + +(rule + (lower_branch (brff cc (ffcmp a @ (value_type ty) b) _ _) targets) + (lower_br_fcmp cc a b targets ty)) + +(decl lower_br_table (Reg VecMachLabel) InstOutput) +(extern constructor lower_br_table lower_br_table) + +(rule + (lower_branch (br_table index _ _) targets) + (lower_br_table index targets)) + +(decl x_reg(u8) Reg) +(extern constructor x_reg x_reg) + +(decl load_ra ()Reg) +(extern constructor load_ra load_ra) + +;;; +(decl gen_andn(Reg Reg) Reg) +(rule + (gen_andn rs1 rs2) + (if-let $true (has_b)) + (alu_rrr (AluOPRRR.Andn) rs1 rs2)) + +(rule + (gen_andn rs1 rs2) + (if-let $false (has_b)) + (let + ((tmp Reg (gen_bit_not rs2))) + (alu_and rs1 tmp))) + +;;; +(decl gen_orn (Reg Reg)Reg) +(rule + (gen_orn rs1 rs2 ) + (if-let $true (has_b)) + (alu_rrr (AluOPRRR.Orn) rs1 rs2)) + +(rule + (gen_orn rs1 rs2) + (if-let $false (has_b)) + (let + ((tmp Reg (gen_bit_not rs2))) + (alu_rrr (AluOPRRR.Or) rs1 tmp))) + +(decl gen_rev8(Reg)Reg) +(rule + (gen_rev8 rs) + (if-let $true (has_b)) + (alu_rr_funct12 (AluOPRRI.Rev8) rs)) + +(rule + (gen_rev8 rs) + (if-let $false (has_b)) + (let + ((rd WritableReg (temp_writable_reg $I64)) + (tmp WritableReg (temp_writable_reg $I64)) + (step WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Rev8 rs step tmp rd)))) + (writable_reg_to_reg rd))) +(decl pure has_zbkb()bool) +(extern constructor has_zbkb has_zbkb) + +(decl gen_brev8 (Reg Type) Reg) +(rule + (gen_brev8 rs _) + (if-let $true (has_zbkb)) + (alu_rr_funct12 (AluOPRRI.Brev8) rs)) +(rule + (gen_brev8 rs ty) + (if-let $false (has_zbkb)) + (let + ((tmp WritableReg (temp_writable_reg $I64)) + (tmp2 WritableReg (temp_writable_reg $I64)) + (step WritableReg (temp_writable_reg $I64)) + (rd WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd)))) + (writable_reg_to_reg rd))) + +;;; x ^ ~y +(decl gen_xor_not(Reg Reg) Reg) +(rule + (gen_xor_not x y) + (let + ((tmp Reg (gen_bit_not y))) + (alu_rrr (AluOPRRR.Xor) x tmp))) + +(decl lower_iabs(Reg Type)Reg) +(rule + (lower_iabs r ty) + (let + ((tmp Reg (ext_int_if_need $true r ty)) + (a Reg (gen_bit_not r)) + (a2 Reg (alu_rr_imm12 (AluOPRRI.Addi) a (imm12_const 1)))) + (gen_select_reg (IntCC.SignedLessThan) r (zero_reg) a2 r))) + +(decl output_ifcout (Reg) InstOutput) +(rule (output_ifcout reg) + (output_pair reg (value_regs_invalid))) + +(decl gen_trapff (FloatCC Reg Reg Type TrapCode)InstOutput) +(rule + (gen_trapff cc a b ty trap_code) + (let + ((tmp WritableReg (temp_writable_reg $I64))) + (side_effect (SideEffectNoResult.Inst(MInst.TrapFf cc a b ty tmp trap_code))))) + +(decl gen_trapif (Reg TrapCode)InstOutput) +(rule + (gen_trapif test trap_code) + (side_effect (SideEffectNoResult.Inst(MInst.TrapIf test trap_code)))) + +(decl gen_trapifc (IntCC Reg Reg TrapCode)InstOutput) +(rule + (gen_trapifc cc a b trap_code) + (side_effect (SideEffectNoResult.Inst(MInst.TrapIfC a b cc trap_code)))) + +(decl shift_int_to_most_significant (Reg Type) Reg) +(extern constructor shift_int_to_most_significant shift_int_to_most_significant) + +;;; generate div overflow. +(decl gen_div_overflow (Reg Reg Type) InstOutput) +(rule + (gen_div_overflow rs1 rs2 ty) + (let + ((r_const_neg_1 Reg (load_imm12 -1)) + (r_const_min Reg (alu_slli (load_imm12 1) 63)) + (tmp_rs1 Reg (shift_int_to_most_significant rs1 ty)) + (t1 Reg (gen_icmp (IntCC.Equal) r_const_neg_1 rs2 ty)) + (t2 Reg (gen_icmp (IntCC.Equal) r_const_min tmp_rs1 ty)) + (test Reg (alu_and t1 t2))) + (gen_trapif test (TrapCode.IntegerOverflow)))) + +(decl gen_div_by_zero(Reg)InstOutput) +(rule + (gen_div_by_zero r) + (gen_trapifc (IntCC.Equal) (zero_reg) r (TrapCode.IntegerDivisionByZero))) + +;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) +(extern constructor gen_call gen_call) + +(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput) +(extern constructor gen_call_indirect gen_call_indirect) + +;;; this is trying to imitate aarch64 `madd` instruction. +(decl madd(Reg Reg Reg)Reg) +(rule + (madd n m a) + (let + ((t Reg (alu_rrr (AluOPRRR.Mul) n m))) + (alu_add t a))) + +(decl umulh(Reg Reg)Reg) +(rule (umulh a b) + (alu_rrr (AluOPRRR.Mulhu) a b)) \ No newline at end of file diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs new file mode 100644 index 000000000000..afb440080c49 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -0,0 +1,1972 @@ +//! Riscv64 ISA definitions: instruction arguments. + +// Some variants are never constructed, but we still want them as options in the future. +#![allow(dead_code)] +use super::*; +use crate::ir::condcodes::{CondCode, FloatCC}; + +use crate::isa::riscv64::inst::{reg_name, reg_to_gpr_num}; +use crate::machinst::isle::WritableReg; + +use std::fmt::{Display, Formatter, Result}; + +/// An addressing mode specified for a load/store operation. +#[derive(Clone, Debug, Copy)] +pub enum AMode { + /// Arbitrary offset from a register. Converted to generation of large + /// offsets with multiple instructions as necessary during code emission. + RegOffset(Reg, i64, Type), + /// Offset from the stack pointer. + SPOffset(i64, Type), + + /// Offset from the frame pointer. + FPOffset(i64, Type), + + /// Offset from the "nominal stack pointer", which is where the real SP is + /// just after stack and spill slots are allocated in the function prologue. + /// At emission time, this is converted to `SPOffset` with a fixup added to + /// the offset constant. The fixup is a running value that is tracked as + /// emission iterates through instructions in linear order, and can be + /// adjusted up and down with [Inst::VirtualSPOffsetAdj]. + /// + /// The standard ABI is in charge of handling this (by emitting the + /// adjustment meta-instructions). It maintains the invariant that "nominal + /// SP" is where the actual SP is after the function prologue and before + /// clobber pushes. See the diagram in the documentation for + /// [crate::isa::riscv64::abi](the ABI module) for more details. + NominalSPOffset(i64, Type), +} + +impl AMode { + pub(crate) fn reg_offset(reg: Reg, imm: i64, ty: Type) -> AMode { + AMode::RegOffset(reg, imm, ty) + } + + pub(crate) fn get_base_register(&self) -> Reg { + match self { + &AMode::RegOffset(reg, ..) => reg, + &AMode::SPOffset(..) => stack_reg(), + &AMode::FPOffset(..) => fp_reg(), + &AMode::NominalSPOffset(..) => stack_reg(), + } + } + + pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { + match self { + &AMode::NominalSPOffset(offset, _) => offset + state.virtual_sp_offset, + _ => self.get_offset(), + } + } + + fn get_offset(&self) -> i64 { + match self { + &AMode::RegOffset(_, offset, ..) => offset, + &AMode::SPOffset(offset, _) => offset, + &AMode::FPOffset(offset, _) => offset, + &AMode::NominalSPOffset(offset, _) => offset, + } + } + + pub(crate) fn to_string_with_alloc(&self, allocs: &mut AllocationConsumer<'_>) -> String { + let reg = self.get_base_register(); + let next = allocs.next(reg); + let offset = self.get_offset(); + match self { + &AMode::NominalSPOffset(..) => format!("{}", self), + _ => format!("{}({})", offset, reg_name(next),), + } + } + + pub(crate) fn to_addr(&self, allocs: &mut AllocationConsumer<'_>) -> String { + let reg = self.get_base_register(); + let next = allocs.next(reg); + let offset = self.get_offset(); + match self { + &AMode::NominalSPOffset(..) => format!("nsp{:+}", offset), + _ => format!("{}{:+}", reg_name(next), offset), + } + } +} + +impl Display for AMode { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + match self { + &AMode::RegOffset(r, offset, ..) => { + write!(f, "{}({:?})", offset, r) + } + &AMode::SPOffset(offset, ..) => { + write!(f, "{}(sp)", offset) + } + &AMode::NominalSPOffset(offset, ..) => { + write!(f, "{}(nominal_sp)", offset) + } + &AMode::FPOffset(offset, ..) => { + write!(f, "{}(fp)", offset) + } + } + } +} + +impl Into for StackAMode { + fn into(self) -> AMode { + match self { + StackAMode::FPOffset(offset, ty) => AMode::FPOffset(offset, ty), + StackAMode::SPOffset(offset, ty) => AMode::SPOffset(offset, ty), + StackAMode::NominalSPOffset(offset, ty) => AMode::NominalSPOffset(offset, ty), + } + } +} + +/// risc-v always take two register to compare +/// brz can be compare with zero register which has the value 0 +#[derive(Clone, Copy, Debug)] +pub struct IntegerCompare { + pub(crate) kind: IntCC, + pub(crate) rs1: Reg, + pub(crate) rs2: Reg, +} + +pub(crate) enum BranchFunct3 { + // == + Eq, + // != + Ne, + // signed < + Lt, + // signed >= + Ge, + // unsigned < + Ltu, + // unsigned >= + Geu, +} + +impl BranchFunct3 { + pub(crate) fn funct3(self) -> u32 { + match self { + BranchFunct3::Eq => 0b000, + BranchFunct3::Ne => 0b001, + BranchFunct3::Lt => 0b100, + BranchFunct3::Ge => 0b101, + BranchFunct3::Ltu => 0b110, + BranchFunct3::Geu => 0b111, + } + } + pub(crate) fn op_name(self) -> &'static str { + match self { + BranchFunct3::Eq => "eq", + BranchFunct3::Ne => "ne", + BranchFunct3::Lt => "lt", + BranchFunct3::Ge => "ge", + BranchFunct3::Ltu => "ltu", + BranchFunct3::Geu => "geu", + } + } +} +impl IntegerCompare { + pub(crate) fn op_code(self) -> u32 { + 0b1100011 + } + + // funct3 and if need inverse the register + pub(crate) fn funct3(&self) -> (BranchFunct3, bool) { + match self.kind { + IntCC::Equal => (BranchFunct3::Eq, false), + IntCC::NotEqual => (BranchFunct3::Ne, false), + IntCC::SignedLessThan => (BranchFunct3::Lt, false), + IntCC::SignedGreaterThanOrEqual => (BranchFunct3::Ge, false), + + IntCC::SignedGreaterThan => (BranchFunct3::Lt, true), + IntCC::SignedLessThanOrEqual => (BranchFunct3::Ge, true), + + IntCC::UnsignedLessThan => (BranchFunct3::Ltu, false), + IntCC::UnsignedGreaterThanOrEqual => (BranchFunct3::Geu, false), + + IntCC::UnsignedGreaterThan => (BranchFunct3::Ltu, true), + IntCC::UnsignedLessThanOrEqual => (BranchFunct3::Geu, true), + } + } + + #[inline] + pub(crate) fn op_name(&self) -> &'static str { + match self.kind { + IntCC::Equal => "beq", + IntCC::NotEqual => "bne", + IntCC::SignedLessThan => "blt", + IntCC::SignedGreaterThanOrEqual => "bge", + IntCC::SignedGreaterThan => "bgt", + IntCC::SignedLessThanOrEqual => "ble", + IntCC::UnsignedLessThan => "bltu", + IntCC::UnsignedGreaterThanOrEqual => "bgeu", + IntCC::UnsignedGreaterThan => "bgtu", + IntCC::UnsignedLessThanOrEqual => "bleu", + } + } + + pub(crate) fn emit(self) -> u32 { + let (funct3, reverse) = self.funct3(); + let (rs1, rs2) = if reverse { + (self.rs2, self.rs1) + } else { + (self.rs1, self.rs2) + }; + + self.op_code() + | funct3.funct3() << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + } + + pub(crate) fn inverse(self) -> Self { + Self { + kind: self.kind.inverse(), + ..self + } + } +} + +impl FpuOPRRRR { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::FmaddS => "fmadd.s", + Self::FmsubS => "fmsub.s", + Self::FnmsubS => "fnmsub.s", + Self::FnmaddS => "fnmadd.s", + Self::FmaddD => "fmadd.d", + Self::FmsubD => "fmsub.d", + Self::FnmsubD => "fnmsub.d", + Self::FnmaddD => "fnmadd.d", + } + } + + pub(crate) fn funct2(self) -> u32 { + match self { + FpuOPRRRR::FmaddS | FpuOPRRRR::FmsubS | FpuOPRRRR::FnmsubS | FpuOPRRRR::FnmaddS => 0, + FpuOPRRRR::FmaddD | FpuOPRRRR::FmsubD | FpuOPRRRR::FnmsubD | FpuOPRRRR::FnmaddD => 1, + } + } + + pub(crate) fn funct3(self, rounding_mode: Option) -> u32 { + rounding_mode.unwrap_or_default().as_u32() + } + + pub(crate) fn op_code(self) -> u32 { + match self { + FpuOPRRRR::FmaddS => 0b1000011, + FpuOPRRRR::FmsubS => 0b1000111, + FpuOPRRRR::FnmsubS => 0b1001011, + FpuOPRRRR::FnmaddS => 0b1001111, + FpuOPRRRR::FmaddD => 0b1000011, + FpuOPRRRR::FmsubD => 0b1000111, + FpuOPRRRR::FnmsubD => 0b1001011, + FpuOPRRRR::FnmaddD => 0b1001111, + } + } +} + +impl FpuOPRR { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::FsqrtS => "fsqrt.s", + Self::FcvtWS => "fcvt.w.s", + Self::FcvtWuS => "fcvt.wu.s", + Self::FmvXW => "fmv.x.w", + Self::FclassS => "fclass.s", + Self::FcvtSw => "fcvt.s.w", + Self::FcvtSwU => "fcvt.s.wu", + Self::FmvWX => "fmv.w.x", + Self::FcvtLS => "fcvt.l.s", + Self::FcvtLuS => "fcvt.lu.s", + Self::FcvtSL => "fcvt.s.l", + Self::FcvtSLU => "fcvt.s.lu", + Self::FcvtLD => "fcvt.l.d", + Self::FcvtLuD => "fcvt.lu.d", + Self::FmvXD => "fmv.x.d", + Self::FcvtDL => "fcvt.d.l", + Self::FcvtDLu => "fcvt.d.lu", + Self::FmvDX => "fmv.d.x", + Self::FsqrtD => "fsqrt.d", + Self::FcvtSD => "fcvt.s.d", + Self::FcvtDS => "fcvt.d.s", + Self::FclassD => "fclass.d", + Self::FcvtWD => "fcvt.w.d", + Self::FcvtWuD => "fcvt.wu.d", + Self::FcvtDW => "fcvt.d.w", + Self::FcvtDWU => "fcvt.d.wu", + } + } + + pub(crate) fn is_convert_to_int(self) -> bool { + match self { + Self::FcvtWS + | Self::FcvtWuS + | Self::FcvtLS + | Self::FcvtLuS + | Self::FcvtWD + | Self::FcvtWuD + | Self::FcvtLD + | Self::FcvtLuD => true, + _ => false, + } + } + // move from x register to float register. + pub(crate) fn move_x_to_f_op(ty: Type) -> Self { + match ty { + F32 => Self::FmvWX, + F64 => Self::FmvDX, + _ => unreachable!("ty:{:?}", ty), + } + } + + // move from f register to x register. + pub(crate) fn move_f_to_x_op(ty: Type) -> Self { + match ty { + F32 => Self::FmvXW, + F64 => Self::FmvXD, + _ => unreachable!("ty:{:?}", ty), + } + } + + pub(crate) fn float_convert_2_int_op(from: Type, is_type_signed: bool, to: Type) -> Self { + let type_32 = to.bits() == 32; + match from { + F32 => { + if is_type_signed { + if type_32 { + Self::FcvtWS + } else { + Self::FcvtLS + } + } else { + if type_32 { + Self::FcvtWuS + } else { + Self::FcvtLuS + } + } + } + F64 => { + if is_type_signed { + if type_32 { + Self::FcvtWD + } else { + Self::FcvtLD + } + } else { + if type_32 { + Self::FcvtWuD + } else { + Self::FcvtLuD + } + } + } + _ => unreachable!("from type:{}", from), + } + } + + pub(crate) fn int_convert_2_float_op(from: Type, is_type_signed: bool, to: Type) -> Self { + let type_32 = from.bits() == 32; + match to { + F32 => { + if is_type_signed { + if type_32 { + Self::FcvtSw + } else { + Self::FcvtSL + } + } else { + if type_32 { + Self::FcvtSwU + } else { + Self::FcvtSLU + } + } + } + F64 => { + if is_type_signed { + if type_32 { + Self::FcvtDW + } else { + Self::FcvtDL + } + } else { + if type_32 { + Self::FcvtDWU + } else { + Self::FcvtDLu + } + } + } + _ => unreachable!("to type:{}", to), + } + } + + pub(crate) fn op_code(self) -> u32 { + match self { + FpuOPRR::FsqrtS + | FpuOPRR::FcvtWS + | FpuOPRR::FcvtWuS + | FpuOPRR::FmvXW + | FpuOPRR::FclassS + | FpuOPRR::FcvtSw + | FpuOPRR::FcvtSwU + | FpuOPRR::FmvWX => 0b1010011, + + FpuOPRR::FcvtLS | FpuOPRR::FcvtLuS | FpuOPRR::FcvtSL | FpuOPRR::FcvtSLU => 0b1010011, + + FpuOPRR::FcvtLD + | FpuOPRR::FcvtLuD + | FpuOPRR::FmvXD + | FpuOPRR::FcvtDL + | FpuOPRR::FcvtDLu + | FpuOPRR::FmvDX => 0b1010011, + + FpuOPRR::FsqrtD + | FpuOPRR::FcvtSD + | FpuOPRR::FcvtDS + | FpuOPRR::FclassD + | FpuOPRR::FcvtWD + | FpuOPRR::FcvtWuD + | FpuOPRR::FcvtDW + | FpuOPRR::FcvtDWU => 0b1010011, + } + } + + pub(crate) fn rs2_funct5(self) -> u32 { + match self { + FpuOPRR::FsqrtS => 0b00000, + FpuOPRR::FcvtWS => 0b00000, + FpuOPRR::FcvtWuS => 0b00001, + FpuOPRR::FmvXW => 0b00000, + FpuOPRR::FclassS => 0b00000, + FpuOPRR::FcvtSw => 0b00000, + FpuOPRR::FcvtSwU => 0b00001, + FpuOPRR::FmvWX => 0b00000, + FpuOPRR::FcvtLS => 0b00010, + FpuOPRR::FcvtLuS => 0b00011, + FpuOPRR::FcvtSL => 0b00010, + FpuOPRR::FcvtSLU => 0b00011, + FpuOPRR::FcvtLD => 0b00010, + FpuOPRR::FcvtLuD => 0b00011, + FpuOPRR::FmvXD => 0b00000, + FpuOPRR::FcvtDL => 0b00010, + FpuOPRR::FcvtDLu => 0b00011, + FpuOPRR::FmvDX => 0b00000, + FpuOPRR::FcvtSD => 0b00001, + FpuOPRR::FcvtDS => 0b00000, + FpuOPRR::FclassD => 0b00000, + FpuOPRR::FcvtWD => 0b00000, + FpuOPRR::FcvtWuD => 0b00001, + FpuOPRR::FcvtDW => 0b00000, + FpuOPRR::FcvtDWU => 0b00001, + FpuOPRR::FsqrtD => 0b00000, + } + } + pub(crate) fn funct7(self) -> u32 { + match self { + FpuOPRR::FsqrtS => 0b0101100, + FpuOPRR::FcvtWS => 0b1100000, + FpuOPRR::FcvtWuS => 0b1100000, + FpuOPRR::FmvXW => 0b1110000, + FpuOPRR::FclassS => 0b1110000, + FpuOPRR::FcvtSw => 0b1101000, + FpuOPRR::FcvtSwU => 0b1101000, + FpuOPRR::FmvWX => 0b1111000, + FpuOPRR::FcvtLS => 0b1100000, + FpuOPRR::FcvtLuS => 0b1100000, + FpuOPRR::FcvtSL => 0b1101000, + FpuOPRR::FcvtSLU => 0b1101000, + FpuOPRR::FcvtLD => 0b1100001, + FpuOPRR::FcvtLuD => 0b1100001, + FpuOPRR::FmvXD => 0b1110001, + FpuOPRR::FcvtDL => 0b1101001, + FpuOPRR::FcvtDLu => 0b1101001, + FpuOPRR::FmvDX => 0b1111001, + FpuOPRR::FcvtSD => 0b0100000, + FpuOPRR::FcvtDS => 0b0100001, + FpuOPRR::FclassD => 0b1110001, + FpuOPRR::FcvtWD => 0b1100001, + FpuOPRR::FcvtWuD => 0b1100001, + FpuOPRR::FcvtDW => 0b1101001, + FpuOPRR::FcvtDWU => 0b1101001, + FpuOPRR::FsqrtD => 0b0101101, + } + } + + pub(crate) fn funct3(self, rounding_mode: Option) -> u32 { + let rounding_mode = rounding_mode.unwrap_or_default().as_u32(); + match self { + FpuOPRR::FsqrtS => rounding_mode, + FpuOPRR::FcvtWS => rounding_mode, + FpuOPRR::FcvtWuS => rounding_mode, + FpuOPRR::FmvXW => 0b000, + FpuOPRR::FclassS => 0b001, + FpuOPRR::FcvtSw => rounding_mode, + FpuOPRR::FcvtSwU => rounding_mode, + FpuOPRR::FmvWX => 0b000, + FpuOPRR::FcvtLS => rounding_mode, + FpuOPRR::FcvtLuS => rounding_mode, + FpuOPRR::FcvtSL => rounding_mode, + FpuOPRR::FcvtSLU => rounding_mode, + FpuOPRR::FcvtLD => rounding_mode, + FpuOPRR::FcvtLuD => rounding_mode, + FpuOPRR::FmvXD => 0b000, + FpuOPRR::FcvtDL => rounding_mode, + FpuOPRR::FcvtDLu => rounding_mode, + FpuOPRR::FmvDX => 0b000, + FpuOPRR::FcvtSD => rounding_mode, + FpuOPRR::FcvtDS => rounding_mode, + FpuOPRR::FclassD => 0b001, + FpuOPRR::FcvtWD => rounding_mode, + FpuOPRR::FcvtWuD => rounding_mode, + FpuOPRR::FcvtDW => rounding_mode, + FpuOPRR::FcvtDWU => 0b000, + FpuOPRR::FsqrtD => rounding_mode, + } + } +} + +impl FpuOPRRR { + pub(crate) const fn op_name(self) -> &'static str { + match self { + Self::FaddS => "fadd.s", + Self::FsubS => "fsub.s", + Self::FmulS => "fmul.s", + Self::FdivS => "fdiv.s", + Self::FsgnjS => "fsgnj.s", + Self::FsgnjnS => "fsgnjn.s", + Self::FsgnjxS => "fsgnjx.s", + Self::FminS => "fmin.s", + Self::FmaxS => "fmax.s", + Self::FeqS => "feq.s", + Self::FltS => "flt.s", + Self::FleS => "fle.s", + Self::FaddD => "fadd.d", + Self::FsubD => "fsub.d", + Self::FmulD => "fmul.d", + Self::FdivD => "fdiv.d", + Self::FsgnjD => "fsgnj.d", + Self::FsgnjnD => "fsgnjn.d", + Self::FsgnjxD => "fsgnjx.d", + Self::FminD => "fmin.d", + Self::FmaxD => "fmax.d", + Self::FeqD => "feq.d", + Self::FltD => "flt.d", + Self::FleD => "fle.d", + } + } + + pub fn funct3(self, rounding_mode: Option) -> u32 { + let rounding_mode = rounding_mode.unwrap_or_default(); + let rounding_mode = rounding_mode.as_u32(); + match self { + Self::FaddS => rounding_mode, + Self::FsubS => rounding_mode, + Self::FmulS => rounding_mode, + Self::FdivS => rounding_mode, + + Self::FsgnjS => 0b000, + Self::FsgnjnS => 0b001, + Self::FsgnjxS => 0b010, + Self::FminS => 0b000, + Self::FmaxS => 0b001, + + Self::FeqS => 0b010, + Self::FltS => 0b001, + Self::FleS => 0b000, + + Self::FaddD => rounding_mode, + Self::FsubD => rounding_mode, + Self::FmulD => rounding_mode, + Self::FdivD => rounding_mode, + + Self::FsgnjD => 0b000, + Self::FsgnjnD => 0b001, + Self::FsgnjxD => 0b010, + Self::FminD => 0b000, + Self::FmaxD => 0b001, + Self::FeqD => 0b010, + Self::FltD => 0b001, + Self::FleD => 0b000, + } + } + + pub fn op_code(self) -> u32 { + match self { + Self::FaddS + | Self::FsubS + | Self::FmulS + | Self::FdivS + | Self::FsgnjS + | Self::FsgnjnS + | Self::FsgnjxS + | Self::FminS + | Self::FmaxS + | Self::FeqS + | Self::FltS + | Self::FleS => 0b1010011, + + Self::FaddD + | Self::FsubD + | Self::FmulD + | Self::FdivD + | Self::FsgnjD + | Self::FsgnjnD + | Self::FsgnjxD + | Self::FminD + | Self::FmaxD + | Self::FeqD + | Self::FltD + | Self::FleD => 0b1010011, + } + } + + pub const fn funct7(self) -> u32 { + match self { + Self::FaddS => 0b0000000, + Self::FsubS => 0b0000100, + Self::FmulS => 0b0001000, + Self::FdivS => 0b0001100, + + Self::FsgnjS => 0b0010000, + Self::FsgnjnS => 0b0010000, + Self::FsgnjxS => 0b0010000, + Self::FminS => 0b0010100, + Self::FmaxS => 0b0010100, + Self::FeqS => 0b1010000, + Self::FltS => 0b1010000, + Self::FleS => 0b1010000, + + Self::FaddD => 0b0000001, + Self::FsubD => 0b0000101, + Self::FmulD => 0b0001001, + Self::FdivD => 0b0001101, + Self::FsgnjD => 0b0010001, + Self::FsgnjnD => 0b0010001, + Self::FsgnjxD => 0b0010001, + Self::FminD => 0b0010101, + Self::FmaxD => 0b0010101, + Self::FeqD => 0b1010001, + Self::FltD => 0b1010001, + Self::FleD => 0b1010001, + } + } + pub fn is_32(self) -> bool { + match self { + Self::FaddS + | Self::FsubS + | Self::FmulS + | Self::FdivS + | Self::FsgnjS + | Self::FsgnjnS + | Self::FsgnjxS + | Self::FminS + | Self::FmaxS + | Self::FeqS + | Self::FltS + | Self::FleS => true, + _ => false, + } + } + + pub fn is_copy_sign(self) -> bool { + match self { + Self::FsgnjD | Self::FsgnjS => true, + _ => false, + } + } + + pub fn is_copy_neg_sign(self) -> bool { + match self { + Self::FsgnjnD | Self::FsgnjnS => true, + _ => false, + } + } + pub fn is_copy_xor_sign(self) -> bool { + match self { + Self::FsgnjxS | Self::FsgnjxD => true, + _ => false, + } + } +} +impl AluOPRRR { + pub(crate) const fn op_name(self) -> &'static str { + match self { + Self::Add => "add", + Self::Sub => "sub", + Self::Sll => "sll", + Self::Slt => "slt", + Self::Sgt => "sgt", + Self::SltU => "sltu", + Self::Sgtu => "sgtu", + Self::Xor => "xor", + Self::Srl => "srl", + Self::Sra => "sra", + Self::Or => "or", + Self::And => "and", + Self::Addw => "addw", + Self::Subw => "subw", + Self::Sllw => "sllw", + Self::Srlw => "srlw", + Self::Sraw => "sraw", + Self::Mul => "mul", + Self::Mulh => "mulh", + Self::Mulhsu => "mulhsu", + Self::Mulhu => "mulhu", + Self::Div => "div", + Self::DivU => "divu", + Self::Rem => "rem", + Self::RemU => "remu", + Self::Mulw => "mulw", + Self::Divw => "divw", + Self::Divuw => "divuw", + Self::Remw => "remw", + Self::Remuw => "remuw", + Self::Adduw => "add.uw", + Self::Andn => "andn", + Self::Bclr => "bclr", + Self::Bext => "bext", + Self::Binv => "binv", + Self::Bset => "bset", + Self::Clmul => "clmul", + Self::Clmulh => "clmulh", + Self::Clmulr => "clmulr", + Self::Max => "max", + Self::Maxu => "maxu", + Self::Min => "min", + Self::Minu => "minu", + Self::Orn => "orn", + Self::Rol => "rol", + Self::Rolw => "rolw", + Self::Ror => "ror", + Self::Rorw => "rorw", + Self::Sh1add => "sh1add", + Self::Sh1adduw => "sh1add.uw", + Self::Sh2add => "sh2add", + Self::Sh2adduw => "sh2add.uw", + Self::Sh3add => "sh3add", + Self::Sh3adduw => "sh3add.uw", + Self::Xnor => "xnor", + } + } + + pub fn funct3(self) -> u32 { + match self { + AluOPRRR::Add => 0b000, + AluOPRRR::Sll => 0b001, + AluOPRRR::Slt => 0b010, + AluOPRRR::Sgt => 0b010, + AluOPRRR::SltU => 0b011, + AluOPRRR::Sgtu => 0b011, + AluOPRRR::Xor => 0b100, + AluOPRRR::Srl => 0b101, + AluOPRRR::Sra => 0b101, + AluOPRRR::Or => 0b110, + AluOPRRR::And => 0b111, + AluOPRRR::Sub => 0b000, + + AluOPRRR::Addw => 0b000, + AluOPRRR::Subw => 0b000, + AluOPRRR::Sllw => 0b001, + AluOPRRR::Srlw => 0b101, + AluOPRRR::Sraw => 0b101, + + AluOPRRR::Mul => 0b000, + AluOPRRR::Mulh => 0b001, + AluOPRRR::Mulhsu => 0b010, + AluOPRRR::Mulhu => 0b011, + AluOPRRR::Div => 0b100, + AluOPRRR::DivU => 0b101, + AluOPRRR::Rem => 0b110, + AluOPRRR::RemU => 0b111, + + AluOPRRR::Mulw => 0b000, + AluOPRRR::Divw => 0b100, + AluOPRRR::Divuw => 0b101, + AluOPRRR::Remw => 0b110, + AluOPRRR::Remuw => 0b111, + + AluOPRRR::Adduw => 0b000, + AluOPRRR::Andn => 0b111, + AluOPRRR::Bclr => 0b001, + AluOPRRR::Bext => 0b101, + AluOPRRR::Binv => 0b001, + AluOPRRR::Bset => 0b001, + AluOPRRR::Clmul => 0b001, + AluOPRRR::Clmulh => 0b011, + AluOPRRR::Clmulr => 0b010, + AluOPRRR::Max => 0b110, + AluOPRRR::Maxu => 0b111, + AluOPRRR::Min => 0b100, + AluOPRRR::Minu => 0b101, + AluOPRRR::Orn => 0b110, + AluOPRRR::Rol => 0b001, + AluOPRRR::Rolw => 0b001, + AluOPRRR::Ror => 0b101, + AluOPRRR::Rorw => 0b101, + AluOPRRR::Sh1add => 0b010, + AluOPRRR::Sh1adduw => 0b010, + AluOPRRR::Sh2add => 0b100, + AluOPRRR::Sh2adduw => 0b100, + AluOPRRR::Sh3add => 0b110, + AluOPRRR::Sh3adduw => 0b110, + AluOPRRR::Xnor => 0b100, + } + } + + pub fn op_code(self) -> u32 { + match self { + AluOPRRR::Add + | AluOPRRR::Sub + | AluOPRRR::Sll + | AluOPRRR::Slt + | AluOPRRR::Sgt + | AluOPRRR::SltU + | AluOPRRR::Sgtu + | AluOPRRR::Xor + | AluOPRRR::Srl + | AluOPRRR::Sra + | AluOPRRR::Or + | AluOPRRR::And => 0b0110011, + + AluOPRRR::Addw | AluOPRRR::Subw | AluOPRRR::Sllw | AluOPRRR::Srlw | AluOPRRR::Sraw => { + 0b0111011 + } + + AluOPRRR::Mul + | AluOPRRR::Mulh + | AluOPRRR::Mulhsu + | AluOPRRR::Mulhu + | AluOPRRR::Div + | AluOPRRR::DivU + | AluOPRRR::Rem + | AluOPRRR::RemU => 0b0110011, + + AluOPRRR::Mulw + | AluOPRRR::Divw + | AluOPRRR::Divuw + | AluOPRRR::Remw + | AluOPRRR::Remuw => 0b0111011, + + AluOPRRR::Adduw => 0b0111011, + AluOPRRR::Andn + | AluOPRRR::Bclr + | AluOPRRR::Bext + | AluOPRRR::Binv + | AluOPRRR::Bset + | AluOPRRR::Clmul + | AluOPRRR::Clmulh + | AluOPRRR::Clmulr + | AluOPRRR::Max + | AluOPRRR::Maxu + | AluOPRRR::Min + | AluOPRRR::Minu + | AluOPRRR::Orn + | AluOPRRR::Rol + | AluOPRRR::Ror + | AluOPRRR::Sh1add + | AluOPRRR::Sh2add + | AluOPRRR::Sh3add + | AluOPRRR::Xnor => 0b0110011, + + AluOPRRR::Rolw + | AluOPRRR::Rorw + | AluOPRRR::Sh2adduw + | AluOPRRR::Sh3adduw + | AluOPRRR::Sh1adduw => 0b0111011, + } + } + + pub const fn funct7(self) -> u32 { + match self { + AluOPRRR::Add => 0b0000000, + AluOPRRR::Sub => 0b0100000, + AluOPRRR::Sll => 0b0000000, + AluOPRRR::Slt => 0b0000000, + AluOPRRR::Sgt => 0b0000000, + AluOPRRR::SltU => 0b0000000, + AluOPRRR::Sgtu => 0b0000000, + + AluOPRRR::Xor => 0b0000000, + AluOPRRR::Srl => 0b0000000, + AluOPRRR::Sra => 0b0100000, + AluOPRRR::Or => 0b0000000, + AluOPRRR::And => 0b0000000, + + AluOPRRR::Addw => 0b0000000, + AluOPRRR::Subw => 0b0100000, + AluOPRRR::Sllw => 0b0000000, + AluOPRRR::Srlw => 0b0000000, + AluOPRRR::Sraw => 0b0100000, + + AluOPRRR::Mul => 0b0000001, + AluOPRRR::Mulh => 0b0000001, + AluOPRRR::Mulhsu => 0b0000001, + AluOPRRR::Mulhu => 0b0000001, + AluOPRRR::Div => 0b0000001, + AluOPRRR::DivU => 0b0000001, + AluOPRRR::Rem => 0b0000001, + AluOPRRR::RemU => 0b0000001, + + AluOPRRR::Mulw => 0b0000001, + AluOPRRR::Divw => 0b0000001, + AluOPRRR::Divuw => 0b0000001, + AluOPRRR::Remw => 0b0000001, + AluOPRRR::Remuw => 0b0000001, + AluOPRRR::Adduw => 0b0000100, + AluOPRRR::Andn => 0b0100000, + AluOPRRR::Bclr => 0b0100100, + AluOPRRR::Bext => 0b0100100, + AluOPRRR::Binv => 0b0110100, + AluOPRRR::Bset => 0b0010100, + AluOPRRR::Clmul => 0b0000101, + AluOPRRR::Clmulh => 0b0000101, + AluOPRRR::Clmulr => 0b0000101, + AluOPRRR::Max => 0b0000101, + AluOPRRR::Maxu => 0b0000101, + AluOPRRR::Min => 0b0000101, + AluOPRRR::Minu => 0b0000101, + AluOPRRR::Orn => 0b0100000, + AluOPRRR::Rol => 0b0110000, + AluOPRRR::Rolw => 0b0110000, + AluOPRRR::Ror => 0b0110000, + AluOPRRR::Rorw => 0b0110000, + AluOPRRR::Sh1add => 0b0010000, + AluOPRRR::Sh1adduw => 0b0010000, + AluOPRRR::Sh2add => 0b0010000, + AluOPRRR::Sh2adduw => 0b0010000, + AluOPRRR::Sh3add => 0b0010000, + AluOPRRR::Sh3adduw => 0b0010000, + AluOPRRR::Xnor => 0b0100000, + } + } + + pub(crate) fn reverse_rs(self) -> bool { + // special case. + // sgt and sgtu is not defined in isa. + // emit should reverse rs1 and rs2. + self == AluOPRRR::Sgt || self == AluOPRRR::Sgtu + } +} + +impl AluOPRRI { + pub(crate) fn option_funct6(self) -> Option { + let x: Option = match self { + Self::Slli => Some(0b00_0000), + Self::Srli => Some(0b00_0000), + Self::Srai => Some(0b01_0000), + Self::Bclri => Some(0b010010), + Self::Bexti => Some(0b010010), + Self::Binvi => Some(0b011010), + Self::Bseti => Some(0b001010), + Self::Rori => Some(0b011000), + Self::SlliUw => Some(0b000010), + _ => None, + }; + x + } + + pub(crate) fn option_funct7(self) -> Option { + let x = match self { + Self::Slliw => Some(0b000_0000), + Self::SrliW => Some(0b000_0000), + Self::Sraiw => Some(0b010_0000), + Self::Roriw => Some(0b0110000), + _ => None, + }; + x + } + + pub(crate) fn imm12(self, imm12: Imm12) -> u32 { + let x = imm12.as_u32(); + if let Some(func) = self.option_funct6() { + func << 6 | (x & 0b11_1111) + } else if let Some(func) = self.option_funct7() { + func << 5 | (x & 0b1_1111) + } else if let Some(func) = self.option_funct12() { + func + } else { + x + } + } + + pub(crate) fn option_funct12(self) -> Option { + match self { + Self::Clz => Some(0b011000000000), + Self::Clzw => Some(0b011000000000), + Self::Cpop => Some(0b011000000010), + Self::Cpopw => Some(0b011000000010), + Self::Ctz => Some(0b011000000001), + Self::Ctzw => Some(0b011000000001), + Self::Rev8 => Some(0b011010111000), + Self::Sextb => Some(0b011000000100), + Self::Sexth => Some(0b011000000101), + Self::Zexth => Some(0b000010000000), + Self::Orcb => Some(0b001010000111), + Self::Brev8 => Some(0b0110_1000_0111), + _ => None, + } + } + + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Addi => "addi", + Self::Slti => "slti", + Self::SltiU => "sltiu", + Self::Xori => "xori", + Self::Ori => "ori", + Self::Andi => "andi", + Self::Slli => "slli", + Self::Srli => "srli", + Self::Srai => "srai", + Self::Addiw => "addiw", + Self::Slliw => "slliw", + Self::SrliW => "srliw", + Self::Sraiw => "sraiw", + Self::Bclri => "bclri", + Self::Bexti => "bexti", + Self::Binvi => "binvi", + Self::Bseti => "bseti", + Self::Rori => "rori", + Self::Roriw => "roriw", + Self::SlliUw => "slli.uw", + Self::Clz => "clz", + Self::Clzw => "clzw", + Self::Cpop => "cpop", + Self::Cpopw => "cpopw", + Self::Ctz => "ctz", + Self::Ctzw => "ctzw", + Self::Rev8 => "rev8", + Self::Sextb => "sext.b", + Self::Sexth => "sext.h", + Self::Zexth => "zext.h", + Self::Orcb => "orc.b", + Self::Brev8 => "brev8", + } + } + + pub fn funct3(self) -> u32 { + match self { + AluOPRRI::Addi => 0b000, + AluOPRRI::Slti => 0b010, + AluOPRRI::SltiU => 0b011, + AluOPRRI::Xori => 0b100, + AluOPRRI::Ori => 0b110, + AluOPRRI::Andi => 0b111, + AluOPRRI::Slli => 0b001, + AluOPRRI::Srli => 0b101, + AluOPRRI::Srai => 0b101, + AluOPRRI::Addiw => 0b000, + AluOPRRI::Slliw => 0b001, + AluOPRRI::SrliW => 0b101, + AluOPRRI::Sraiw => 0b101, + AluOPRRI::Bclri => 0b001, + AluOPRRI::Bexti => 0b101, + AluOPRRI::Binvi => 0b001, + AluOPRRI::Bseti => 0b001, + AluOPRRI::Rori => 0b101, + AluOPRRI::Roriw => 0b101, + AluOPRRI::SlliUw => 0b001, + AluOPRRI::Clz => 0b001, + AluOPRRI::Clzw => 0b001, + AluOPRRI::Cpop => 0b001, + AluOPRRI::Cpopw => 0b001, + AluOPRRI::Ctz => 0b001, + AluOPRRI::Ctzw => 0b001, + AluOPRRI::Rev8 => 0b101, + AluOPRRI::Sextb => 0b001, + AluOPRRI::Sexth => 0b001, + AluOPRRI::Zexth => 0b100, + AluOPRRI::Orcb => 0b101, + AluOPRRI::Brev8 => 0b101, + } + } + + pub fn op_code(self) -> u32 { + match self { + AluOPRRI::Addi + | AluOPRRI::Slti + | AluOPRRI::SltiU + | AluOPRRI::Xori + | AluOPRRI::Ori + | AluOPRRI::Andi + | AluOPRRI::Slli + | AluOPRRI::Srli + | AluOPRRI::Srai + | AluOPRRI::Bclri + | AluOPRRI::Bexti + | AluOPRRI::Binvi + | AluOPRRI::Bseti + | AluOPRRI::Rori + | AluOPRRI::Clz + | AluOPRRI::Cpop + | AluOPRRI::Ctz + | AluOPRRI::Rev8 + | AluOPRRI::Sextb + | AluOPRRI::Sexth + | AluOPRRI::Orcb + | AluOPRRI::Brev8 => 0b0010011, + + AluOPRRI::Addiw + | AluOPRRI::Slliw + | AluOPRRI::SrliW + | AluOPRRI::Sraiw + | AluOPRRI::Roriw + | AluOPRRI::SlliUw + | AluOPRRI::Clzw + | AluOPRRI::Cpopw + | AluOPRRI::Ctzw => 0b0011011, + AluOPRRI::Zexth => 0b0111011, + } + } +} + +impl Default for FRM { + fn default() -> Self { + Self::Fcsr + } +} + +/// float rounding mode. +impl FRM { + pub(crate) fn to_static_str(self) -> &'static str { + match self { + FRM::RNE => "rne", + FRM::RTZ => "rtz", + FRM::RDN => "rdn", + FRM::RUP => "rup", + FRM::RMM => "rmm", + FRM::Fcsr => "fcsr", + } + } + + #[inline] + pub(crate) fn bits(self) -> u8 { + match self { + FRM::RNE => 0b000, + FRM::RTZ => 0b001, + FRM::RDN => 0b010, + FRM::RUP => 0b011, + FRM::RMM => 0b100, + FRM::Fcsr => 0b111, + } + } + pub(crate) fn as_u32(self) -> u32 { + self.bits() as u32 + } +} + +impl FFlagsException { + #[inline] + pub(crate) fn mask(self) -> u32 { + match self { + FFlagsException::NV => 1 << 4, + FFlagsException::DZ => 1 << 3, + FFlagsException::OF => 1 << 2, + FFlagsException::UF => 1 << 1, + FFlagsException::NX => 1 << 0, + } + } +} + +impl LoadOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Lb => "lb", + Self::Lh => "lh", + Self::Lw => "lw", + Self::Lbu => "lbu", + Self::Lhu => "lhu", + Self::Lwu => "lwu", + Self::Ld => "ld", + Self::Flw => "flw", + Self::Fld => "fld", + } + } + + pub(crate) fn from_type(t: Type) -> Self { + if t.is_float() { + return if t == F32 { Self::Flw } else { Self::Fld }; + } + match t { + B1 | B8 => Self::Lbu, + B16 => Self::Lhu, + B32 | R32 => Self::Lwu, + B64 | R64 | I64 => Self::Ld, + + I8 => Self::Lb, + I16 => Self::Lh, + I32 => Self::Lw, + _ => unreachable!(), + } + } + + pub(crate) fn op_code(self) -> u32 { + match self { + Self::Lb | Self::Lh | Self::Lw | Self::Lbu | Self::Lhu | Self::Lwu | Self::Ld => { + 0b0000011 + } + Self::Flw | Self::Fld => 0b0000111, + } + } + pub(crate) fn funct3(self) -> u32 { + match self { + Self::Lb => 0b000, + Self::Lh => 0b001, + Self::Lw => 0b010, + Self::Lwu => 0b110, + Self::Lbu => 0b100, + Self::Lhu => 0b101, + Self::Ld => 0b011, + Self::Flw => 0b010, + Self::Fld => 0b011, + } + } +} + +impl StoreOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + Self::Sb => "sb", + Self::Sh => "sh", + Self::Sw => "sw", + Self::Sd => "sd", + Self::Fsw => "fsw", + Self::Fsd => "fsd", + } + } + pub(crate) fn from_type(t: Type) -> Self { + if t.is_float() { + return if t == F32 { Self::Fsw } else { Self::Fsd }; + } + match t.bits() { + 1 | 8 => Self::Sb, + 16 => Self::Sh, + 32 => Self::Sw, + 64 => Self::Sd, + _ => unreachable!(), + } + } + pub(crate) fn op_code(self) -> u32 { + match self { + Self::Sb | Self::Sh | Self::Sw | Self::Sd => 0b0100011, + Self::Fsw | Self::Fsd => 0b0100111, + } + } + pub(crate) fn funct3(self) -> u32 { + match self { + Self::Sb => 0b000, + Self::Sh => 0b001, + Self::Sw => 0b010, + Self::Sd => 0b011, + Self::Fsw => 0b010, + Self::Fsd => 0b011, + } + } +} + +impl FClassResult { + pub(crate) const fn bit(self) -> u32 { + match self { + FClassResult::NegInfinite => 1 << 0, + FClassResult::NegNormal => 1 << 1, + FClassResult::NegSubNormal => 1 << 2, + FClassResult::NegZero => 1 << 3, + FClassResult::PosZero => 1 << 4, + FClassResult::PosSubNormal => 1 << 5, + FClassResult::PosNormal => 1 << 6, + FClassResult::PosInfinite => 1 << 7, + FClassResult::SNaN => 1 << 8, + FClassResult::QNaN => 1 << 9, + } + } + + #[inline] + pub(crate) const fn is_nan_bits() -> u32 { + Self::SNaN.bit() | Self::QNaN.bit() + } + #[inline] + pub(crate) fn is_zero_bits() -> u32 { + Self::NegZero.bit() | Self::PosZero.bit() + } + + #[inline] + pub(crate) fn is_infinite_bits() -> u32 { + Self::PosInfinite.bit() | Self::NegInfinite.bit() + } +} + +/// Condition code for comparing floating point numbers. +/// This condition code is used by the fcmp instruction to compare floating point values. Two IEEE floating point values relate in exactly one of four ways: +/// UN - unordered when either value is NaN. +/// EQ - equal numerical value. +/// LT - x is less than y. +/// GT - x is greater than y. +#[derive(Clone, Copy)] +pub struct FloatCCArgs(pub(crate) u8); + +impl FloatCCArgs { + // unorder + pub(crate) const UN: u8 = 1 << 0; + // equal + pub(crate) const EQ: u8 = 1 << 1; + // less than + pub(crate) const LT: u8 = 1 << 2; + // greater than + pub(crate) const GT: u8 = 1 << 3; + // not equal + pub(crate) const NE: u8 = 1 << 4; + + /// mask bit for floatcc + pub(crate) fn from_floatcc>(t: T) -> Self { + let x = match t.into() { + FloatCC::Ordered => Self::EQ | Self::LT | Self::GT, + FloatCC::Unordered => Self::UN, + FloatCC::Equal => Self::EQ, + FloatCC::NotEqual => Self::NE, + FloatCC::OrderedNotEqual => Self::LT | Self::GT, + FloatCC::UnorderedOrEqual => Self::UN | Self::EQ, + FloatCC::LessThan => Self::LT, + FloatCC::LessThanOrEqual => Self::LT | Self::EQ, + FloatCC::GreaterThan => Self::GT, + FloatCC::GreaterThanOrEqual => Self::GT | Self::EQ, + FloatCC::UnorderedOrLessThan => Self::UN | Self::LT, + FloatCC::UnorderedOrLessThanOrEqual => Self::UN | Self::LT | Self::EQ, + FloatCC::UnorderedOrGreaterThan => Self::UN | Self::GT, + FloatCC::UnorderedOrGreaterThanOrEqual => Self::UN | Self::GT | Self::EQ, + }; + + Self(x) + } + + #[inline] + pub(crate) fn has(&self, other: u8) -> bool { + (self.0 & other) == other + } + + pub(crate) fn has_and_clear(&mut self, other: u8) -> bool { + if !self.has(other) { + return false; + } + self.clear_bits(other); + return true; + } + + #[inline] + fn clear_bits(&mut self, c: u8) { + self.0 = self.0 & !c; + } +} + +impl AtomicOP { + #[inline] + pub(crate) fn is_load(self) -> bool { + match self { + Self::LrW | Self::LrD => true, + _ => false, + } + } + + #[inline] + pub(crate) fn op_name(self, amo: AMO) -> String { + let s = match self { + Self::LrW => "lr.w", + Self::ScW => "sc.w", + + Self::AmoswapW => "amoswap.w", + Self::AmoaddW => "amoadd.w", + Self::AmoxorW => "amoxor.w", + Self::AmoandW => "amoand.w", + Self::AmoorW => "amoor.w", + Self::AmominW => "amomin.w", + Self::AmomaxW => "amomax.w", + Self::AmominuW => "amominu.w", + Self::AmomaxuW => "amomaxu.w", + Self::LrD => "lr.d", + Self::ScD => "sc.d", + Self::AmoswapD => "amoswap.d", + Self::AmoaddD => "amoadd.d", + Self::AmoxorD => "amoxor.d", + Self::AmoandD => "amoand.d", + Self::AmoorD => "amoor.d", + Self::AmominD => "amomin.d", + Self::AmomaxD => "amomax.d", + Self::AmominuD => "amominu.d", + Self::AmomaxuD => "amomaxu.d", + }; + format!("{}{}", s, amo.to_static_str()) + } + #[inline] + pub(crate) fn op_code(self) -> u32 { + 0b0101111 + } + + #[inline] + pub(crate) fn funct7(self, amo: AMO) -> u32 { + self.funct5() << 2 | amo.as_u32() & 0b11 + } + + pub(crate) fn funct3(self) -> u32 { + match self { + AtomicOP::LrW + | AtomicOP::ScW + | AtomicOP::AmoswapW + | AtomicOP::AmoaddW + | AtomicOP::AmoxorW + | AtomicOP::AmoandW + | AtomicOP::AmoorW + | AtomicOP::AmominW + | AtomicOP::AmomaxW + | AtomicOP::AmominuW + | AtomicOP::AmomaxuW => 0b010, + AtomicOP::LrD + | AtomicOP::ScD + | AtomicOP::AmoswapD + | AtomicOP::AmoaddD + | AtomicOP::AmoxorD + | AtomicOP::AmoandD + | AtomicOP::AmoorD + | AtomicOP::AmominD + | AtomicOP::AmomaxD + | AtomicOP::AmominuD + | AtomicOP::AmomaxuD => 0b011, + } + } + pub(crate) fn funct5(self) -> u32 { + match self { + AtomicOP::LrW => 0b00010, + AtomicOP::ScW => 0b00011, + AtomicOP::AmoswapW => 0b00001, + AtomicOP::AmoaddW => 0b00000, + AtomicOP::AmoxorW => 0b00100, + AtomicOP::AmoandW => 0b01100, + AtomicOP::AmoorW => 0b01000, + AtomicOP::AmominW => 0b10000, + AtomicOP::AmomaxW => 0b10100, + AtomicOP::AmominuW => 0b11000, + AtomicOP::AmomaxuW => 0b11100, + AtomicOP::LrD => 0b00010, + AtomicOP::ScD => 0b00011, + AtomicOP::AmoswapD => 0b00001, + AtomicOP::AmoaddD => 0b00000, + AtomicOP::AmoxorD => 0b00100, + AtomicOP::AmoandD => 0b01100, + AtomicOP::AmoorD => 0b01000, + AtomicOP::AmominD => 0b10000, + AtomicOP::AmomaxD => 0b10100, + AtomicOP::AmominuD => 0b11000, + AtomicOP::AmomaxuD => 0b11100, + } + } + + pub(crate) fn load_op(t: Type) -> Self { + if t.bits() <= 32 { + Self::LrW + } else { + Self::LrD + } + } + pub(crate) fn store_op(t: Type) -> Self { + if t.bits() <= 32 { + Self::ScW + } else { + Self::ScD + } + } + + /// extract + pub(crate) fn extract(rd: WritableReg, offset: Reg, rs: Reg, ty: Type) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: rd, + rs1: rs, + rs2: offset, + }); + // + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts + } + + /// like extract but sign extend the value. + /// suitable for imax. + pub(crate) fn extract_sext( + rd: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: rd, + rs1: rs, + rs2: offset, + }); + // + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: true, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts + } + + pub(crate) fn unset( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + ty: Type, + ) -> SmallInstVec { + assert!(rd != tmp); + let mut insts = SmallInstVec::new(); + insts.extend(Inst::load_int_mask(tmp, ty)); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: tmp, + rs1: tmp.to_reg(), + rs2: offset, + }); + insts.push(Inst::construct_bit_not(tmp, tmp.to_reg())); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp.to_reg(), + }); + insts + } + + pub(crate) fn set( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + assert!(rd != tmp); + let mut insts = SmallInstVec::new(); + // make rs into tmp. + insts.push(Inst::Extend { + rd: tmp, + rn: rs, + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: tmp, + rs1: tmp.to_reg(), + rs2: offset, + }); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp.to_reg(), + }); + insts + } + + /// Merge reset part of rs into rd. + /// Call this function must make sure that other part of value is already in rd. + pub(crate) fn merge( + rd: WritableReg, + tmp: WritableReg, + offset: Reg, + rs: Reg, + ty: Type, + ) -> SmallInstVec { + let mut insts = Self::unset(rd, tmp, offset, ty); + insts.extend(Self::set(rd, tmp, offset, rs, ty)); + insts + } +} + +impl IntSelectOP { + #[inline] + pub(crate) fn from_ir_op(op: crate::ir::Opcode) -> Self { + match op { + crate::ir::Opcode::Imax => Self::Imax, + crate::ir::Opcode::Umax => Self::Umax, + crate::ir::Opcode::Imin => Self::Imin, + crate::ir::Opcode::Umin => Self::Umin, + _ => unreachable!(), + } + } + #[inline] + pub(crate) fn op_name(self) -> &'static str { + match self { + IntSelectOP::Imax => "imax", + IntSelectOP::Umax => "umax", + IntSelectOP::Imin => "imin", + IntSelectOP::Umin => "umin", + } + } + #[inline] + pub(crate) fn to_int_cc(self) -> IntCC { + match self { + IntSelectOP::Imax => IntCC::SignedGreaterThan, + IntSelectOP::Umax => IntCC::UnsignedGreaterThan, + IntSelectOP::Imin => IntCC::SignedLessThan, + IntSelectOP::Umin => IntCC::UnsignedLessThan, + } + } +} + +impl ReferenceCheckOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + ReferenceCheckOP::IsNull => "is_null", + ReferenceCheckOP::IsInvalid => "is_invalid", + } + } + #[inline] + pub(crate) fn from_ir_op(op: crate::ir::Opcode) -> Self { + match op { + crate::ir::Opcode::IsInvalid => Self::IsInvalid, + crate::ir::Opcode::IsNull => Self::IsNull, + _ => unreachable!(), + } + } +} + +#[derive(Clone, Copy)] +pub enum CsrAddress { + Fcsr = 0x3, + Vstart = 0x8, + Vxsat = 0x9, + Vxrm = 0xa, + Vcsr = 0xf, + Vl = 0xc20, + Vtype = 0xc21, + Vlenb = 0xc22, +} + +impl std::fmt::Debug for CsrAddress { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "0x{:x}", self.as_u32()) + } +} + +impl Display for CsrAddress { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "0x{:x}", self.as_u32()) + } +} +impl CsrAddress { + pub(crate) fn as_u32(self) -> u32 { + self as u32 + } +} + +pub(crate) struct VType { + vma: bool, + vta: bool, + vsew: Vsew, + valmul: Vlmul, +} + +impl VType { + fn as_u32(self) -> u32 { + self.valmul.as_u32() + | self.vsew.as_u32() << 3 + | if self.vta { 1 << 7 } else { 0 } + | if self.vma { 1 << 8 } else { 0 } + } + + const fn vill_bit() -> u64 { + 1 << 63 + } +} + +enum Vlmul { + vlmul_1_div_8 = 0b101, + vlmul_1_div_4 = 0b110, + vlmul_1_div_2 = 0b111, + vlmul_1 = 0b000, + vlmul_2 = 0b001, + vlmul_4 = 0b010, + vlmul_8 = 0b011, +} + +impl Vlmul { + fn as_u32(self) -> u32 { + self as u32 + } +} + +enum Vsew { + sew_8 = 0b000, + sew_16 = 0b001, + sew_32 = 0b010, + sew_64 = 0b011, +} + +impl Vsew { + fn as_u32(self) -> u32 { + self as u32 + } +} + +impl CsrOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + CsrOP::Csrrw => "csrrw", + CsrOP::Csrrs => "csrrs", + CsrOP::Csrrc => "csrrc", + CsrOP::Csrrwi => "csrrwi", + CsrOP::Csrrsi => "csrrsi", + CsrOP::Csrrci => "csrrci", + } + } + + pub(crate) const fn need_rs(self) -> bool { + match self { + CsrOP::Csrrw | CsrOP::Csrrs | CsrOP::Csrrc => true, + _ => false, + } + } + pub(crate) const fn op_code(self) -> u32 { + 0b1110011 + } + + pub(crate) fn funct3(self) -> u32 { + match self { + CsrOP::Csrrw => 0b001, + CsrOP::Csrrs => 0b010, + CsrOP::Csrrc => 0b011, + CsrOP::Csrrwi => 0b101, + CsrOP::Csrrsi => 0b110, + CsrOP::Csrrci => 0b110, + } + } + + pub(crate) fn rs1(self, rs: Option, zimm: OptionUimm5) -> u32 { + if self.need_rs() { + reg_to_gpr_num(rs.unwrap()) + } else { + zimm.unwrap().as_u32() + } + } +} + +enum Vxrm { + // round-to-nearest-up (add +0.5 LSB) + rnu = 0b00, + // round-to-nearest-even + rne = 0b01, + //round-down (truncate) + rdn = 0b10, + // round-to-odd (OR bits into LSB, aka "jam") + rod = 0b11, +} + +impl Vxrm { + pub(crate) fn as_u32(self) -> u32 { + self as u32 + } +} + +pub(crate) struct Vcsr { + xvrm: Vxrm, + // Fixed-point accrued saturation flag + vxsat: bool, +} + +impl Vcsr { + pub(crate) fn as_u32(self) -> u32 { + return if self.vxsat { 1 } else { 0 } | self.xvrm.as_u32(); + } +} + +///Atomic Memory ordering. +#[derive(Copy, Clone, Debug)] +pub enum AMO { + Relax = 0b00, + Release = 0b01, + Aquire = 0b10, + SeqCst = 0b11, +} + +impl AMO { + pub(crate) fn to_static_str(self) -> &'static str { + match self { + AMO::Relax => "", + AMO::Release => ".rl", + AMO::Aquire => ".aq", + AMO::SeqCst => ".aqrl", + } + } + pub(crate) fn as_u32(self) -> u32 { + self as u32 + } +} + +impl Inst { + /// fence request bits. + pub(crate) const FENCE_REQ_I: u8 = 1 << 3; + pub(crate) const FENCE_REQ_O: u8 = 1 << 2; + pub(crate) const FENCE_REQ_R: u8 = 1 << 1; + pub(crate) const FENCE_REQ_W: u8 = 1 << 0; + pub(crate) fn fence_req_to_string(x: u8) -> String { + let mut s = String::default(); + if x & Self::FENCE_REQ_I != 0 { + s.push_str("i"); + } + if x & Self::FENCE_REQ_O != 0 { + s.push_str("o"); + } + if x & Self::FENCE_REQ_R != 0 { + s.push_str("r"); + } + if x & Self::FENCE_REQ_W != 0 { + s.push_str("w"); + } + s + } +} +impl Default for FenceFm { + fn default() -> Self { + Self::None + } +} +impl FenceFm { + pub(crate) fn as_u32(self) -> u32 { + match self { + FenceFm::None => 0, + FenceFm::Tso => 0b1000, + } + } +} +impl FloatRoundOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + FloatRoundOP::Nearest => "nearest", + FloatRoundOP::Ceil => "ceil", + FloatRoundOP::Floor => "floor", + FloatRoundOP::Trunc => "trunc", + } + } + + pub(crate) fn to_frm(self) -> FRM { + match self { + FloatRoundOP::Nearest => FRM::RNE, + FloatRoundOP::Ceil => FRM::RUP, + FloatRoundOP::Floor => FRM::RDN, + FloatRoundOP::Trunc => FRM::RTZ, + } + } +} + +impl FloatSelectOP { + pub(crate) fn op_name(self) -> &'static str { + match self { + FloatSelectOP::Max => "max", + FloatSelectOP::Min => "min", + } + } + + pub(crate) fn to_fpuoprrr(self, ty: Type) -> FpuOPRRR { + match self { + FloatSelectOP::Max => { + if ty == F32 { + FpuOPRRR::FmaxS + } else { + FpuOPRRR::FmaxD + } + } + FloatSelectOP::Min => { + if ty == F32 { + FpuOPRRR::FminS + } else { + FpuOPRRR::FminD + } + } + } + } + // move qnan bits into int register. + pub(crate) fn snan_bits(self, rd: Writable, ty: Type) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + let x = if ty == F32 { 22 } else { 51 }; + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(x), + }); + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(x), + }); + insts + } +} + +pub(crate) fn f32_bits(f: f32) -> u32 { + u32::from_le_bytes(f.to_le_bytes()) +} +pub(crate) fn f64_bits(f: f64) -> u64 { + u64::from_le_bytes(f.to_le_bytes()) +} + +/// +pub(crate) fn f32_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f32, f32) { + match (signed, out_bits) { + (true, 8) => (i8::min_value() as f32 - 1., i8::max_value() as f32 + 1.), + (true, 16) => (i16::min_value() as f32 - 1., i16::max_value() as f32 + 1.), + (true, 32) => (-2147483904.0, 2147483648.0), + (true, 64) => (-9223373136366403584.0, 9223372036854775808.0), + (false, 8) => (-1., u8::max_value() as f32 + 1.), + (false, 16) => (-1., u16::max_value() as f32 + 1.), + (false, 32) => (-1., 4294967296.0), + (false, 64) => (-1., 18446744073709551616.0), + _ => unreachable!(), + } +} + +pub(crate) fn f64_cvt_to_int_bounds(signed: bool, out_bits: u8) -> (f64, f64) { + match (signed, out_bits) { + (true, 8) => (i8::min_value() as f64 - 1., i8::max_value() as f64 + 1.), + (true, 16) => (i16::min_value() as f64 - 1., i16::max_value() as f64 + 1.), + (true, 32) => (-2147483649.0, 2147483648.0), + (true, 64) => (-9223372036854777856.0, 9223372036854775808.0), + (false, 8) => (-1., u8::max_value() as f64 + 1.), + (false, 16) => (-1., u16::max_value() as f64 + 1.), + (false, 32) => (-1., 4294967296.0), + (false, 64) => (-1., 18446744073709551616.0), + _ => unreachable!(), + } +} + +#[cfg(test)] +mod test { + use super::FloatCCArgs; + #[test] + + fn float_cc_bit_clear() { + let mut x = FloatCCArgs(FloatCCArgs::UN | FloatCCArgs::GT | FloatCCArgs::EQ); + assert!(x.has_and_clear(FloatCCArgs::UN | FloatCCArgs::GT)); + assert!(x.has(FloatCCArgs::EQ)); + assert!(!x.has(FloatCCArgs::UN)); + assert!(!x.has(FloatCCArgs::GT)); + } + #[test] + fn float_cc_bit_has() { + let x = FloatCCArgs(FloatCCArgs::UN | FloatCCArgs::GT | FloatCCArgs::EQ); + assert!(x.has(FloatCCArgs::UN | FloatCCArgs::GT)); + assert!(!x.has(FloatCCArgs::LT)); + } +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs new file mode 100644 index 000000000000..bbbecb97c282 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -0,0 +1,2920 @@ +//! Riscv64 ISA: binary code emission. + +use crate::binemit::StackMap; +use crate::ir::RelSourceLoc; +use crate::ir::TrapCode; +use crate::isa::riscv64::inst::*; +use crate::isa::riscv64::inst::{zero_reg, AluOPRRR}; +use crate::machinst::{AllocationConsumer, Reg, Writable}; +use regalloc2::Allocation; + +pub struct EmitInfo { + shared_flag: settings::Flags, + isa_flags: super::super::riscv_settings::Flags, +} + +impl EmitInfo { + pub(crate) fn new( + shared_flag: settings::Flags, + isa_flags: super::super::riscv_settings::Flags, + ) -> Self { + Self { + shared_flag, + isa_flags, + } + } +} + +/// load constant by put the constant in the code stream. +/// calculate the pc and using load instruction. +#[derive(Clone, Copy)] +pub(crate) enum LoadConstant { + U32(u32), + U64(u64), +} + +impl LoadConstant { + fn to_le_bytes(self) -> Vec { + match self { + LoadConstant::U32(x) => Vec::from_iter(x.to_le_bytes().into_iter()), + LoadConstant::U64(x) => Vec::from_iter(x.to_le_bytes().into_iter()), + } + } + fn load_op(self) -> LoadOP { + match self { + LoadConstant::U32(_) => LoadOP::Lwu, + LoadConstant::U64(_) => LoadOP::Ld, + } + } + fn load_ty(self) -> Type { + match self { + LoadConstant::U32(_) => R32, + LoadConstant::U64(_) => R64, + } + } + + pub(crate) fn load_constant(self, rd: Writable) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + // get current pc. + insts.push(Inst::Auipc { + rd, + imm: Imm20 { bits: 0 }, + }); + // load + insts.push(Inst::Load { + rd, + op: self.load_op(), + flags: MemFlags::new(), + from: AMode::RegOffset(rd.to_reg(), 12, self.load_ty()), + }); + let data = self.to_le_bytes(); + // jump over. + insts.push(Inst::Jal { + dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE + data.len() as i32), + }); + insts.push(Inst::RawData { data }); + insts + } + + // load and perform an extra add. + pub(crate) fn load_constant_and_add(self, rd: Writable, rs: Reg) -> SmallInstVec { + let mut insts = self.load_constant(rd); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: rd, + rs1: rd.to_reg(), + rs2: rs, + }); + insts + } +} + +pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 { + u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() +} + +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + pub(crate) virtual_sp_offset: i64, + pub(crate) nominal_sp_to_fp: i64, + /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + stack_map: Option, + /// Current source-code location corresponding to instruction to be emitted. + cur_srcloc: RelSourceLoc, +} + +impl EmitState { + fn take_stack_map(&mut self) -> Option { + self.stack_map.take() + } + + fn clear_post_insn(&mut self) { + self.stack_map = None; + } + + fn cur_srcloc(&self) -> RelSourceLoc { + self.cur_srcloc + } +} + +impl MachInstEmitState for EmitState { + fn new(abi: &Callee) -> Self { + EmitState { + virtual_sp_offset: 0, + nominal_sp_to_fp: abi.frame_size() as i64, + stack_map: None, + cur_srcloc: RelSourceLoc::default(), + } + } + + fn pre_safepoint(&mut self, stack_map: StackMap) { + self.stack_map = Some(stack_map); + } + + fn pre_sourceloc(&mut self, srcloc: RelSourceLoc) { + self.cur_srcloc = srcloc; + } +} + +impl Inst { + /// construct a "imm - rs". + pub(crate) fn construct_imm_sub_rs(rd: Writable, imm: u64, rs: Reg) -> SmallInstVec { + let mut insts = Inst::load_constant_u64(rd, imm); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd, + rs1: rd.to_reg(), + rs2: rs, + }); + insts + } + + /// Load int mask. + /// If ty is int then 0xff in rd. + pub(crate) fn load_int_mask(rd: Writable, ty: Type) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + assert!(ty.is_int() && ty.bits() <= 64); + match ty { + I64 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + } + I32 | I16 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(-1))); + insts.push(Inst::Extend { + rd: rd, + rn: rd.to_reg(), + signed: false, + from_bits: ty.bits() as u8, + to_bits: 64, + }); + } + I8 => { + insts.push(Inst::load_imm12(rd, Imm12::from_bits(255))); + } + _ => unreachable!("ty:{:?}", ty), + } + insts + } + /// inverse all bit + pub(crate) fn construct_bit_not(rd: Writable, rs: Reg) -> Inst { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Xori, + rd, + rs, + imm12: Imm12::from_bits(-1), + } + } + + // emit a float is not a nan. + pub(crate) fn emit_not_nan(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FeqS + } else { + FpuOPRRR::FeqD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + + pub(crate) fn emit_fabs(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjxS + } else { + FpuOPRRR::FsgnjxD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + /// If a float is zero. + pub(crate) fn emit_if_float_not_zero( + tmp: Writable, + rs: Reg, + ty: Type, + taken: BranchTarget, + not_taken: BranchTarget, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + let class_op = if ty == F32 { + FpuOPRR::FclassS + } else { + FpuOPRR::FclassD + }; + insts.push(Inst::FpuRR { + alu_op: class_op, + frm: None, + rd: tmp, + rs: rs, + }); + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(FClassResult::is_zero_bits() as i16), + }); + insts.push(Inst::CondBr { + taken, + not_taken, + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + insts + } + pub(crate) fn emit_fneg(rd: Writable, rs: Reg, ty: Type) -> Inst { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjnS + } else { + FpuOPRRR::FsgnjnD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + } + + pub(crate) fn lower_br_fcmp( + cc: FloatCC, + x: Reg, + y: Reg, + taken: BranchTarget, + not_taken: BranchTarget, + ty: Type, + tmp: Writable, + ) -> SmallInstVec { + assert!(tmp.to_reg().class() == RegClass::Int); + let mut insts = SmallInstVec::new(); + let mut cc_args = FloatCCArgs::from_floatcc(cc); + let eq_op = if ty == F32 { + FpuOPRRR::FeqS + } else { + FpuOPRRR::FeqD + }; + let lt_op = if ty == F32 { + FpuOPRRR::FltS + } else { + FpuOPRRR::FltD + }; + let le_op = if ty == F32 { + FpuOPRRR::FleS + } else { + FpuOPRRR::FleD + }; + + // >= + if cc_args.has_and_clear(FloatCCArgs::GT | FloatCCArgs::EQ) { + insts.push(Inst::FpuRRR { + frm: None, + alu_op: le_op, + rd: tmp, + rs1: y, // x and y order reversed. + rs2: x, + }); + insts.push(Inst::CondBr { + taken: taken, + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + } + + // <= + if cc_args.has_and_clear(FloatCCArgs::LT | FloatCCArgs::EQ) { + insts.push(Inst::FpuRRR { + frm: None, + alu_op: le_op, + rd: tmp, + rs1: x, + rs2: y, + }); + insts.push(Inst::CondBr { + taken: taken, + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + } + + // if eq + if cc_args.has_and_clear(FloatCCArgs::EQ) { + insts.push(Inst::FpuRRR { + frm: None, + alu_op: eq_op, + rd: tmp, + rs1: x, + rs2: y, + }); + insts.push(Inst::CondBr { + taken: taken, + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + } + // if ne + if cc_args.has_and_clear(FloatCCArgs::NE) { + insts.push(Inst::FpuRRR { + frm: None, + alu_op: eq_op, + rd: tmp, + rs1: x, + rs2: y, + }); + insts.push(Inst::CondBr { + taken: taken, + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + } + + // if < + if cc_args.has_and_clear(FloatCCArgs::LT) { + insts.push(Inst::FpuRRR { + frm: None, + alu_op: lt_op, + rd: tmp, + rs1: x, + rs2: y, + }); + insts.push(Inst::CondBr { + taken: taken, + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + } + // if gt + if cc_args.has_and_clear(FloatCCArgs::GT) { + insts.push(Inst::FpuRRR { + frm: None, + alu_op: lt_op, + rd: tmp, + rs1: y, // x and y order reversed. + rs2: x, + }); + insts.push(Inst::CondBr { + taken, + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + } + // if unordered + if cc_args.has_and_clear(FloatCCArgs::UN) { + insts.extend(Inst::lower_float_unordered(tmp, ty, x, y, taken, not_taken)); + } else { + //make sure we goto the not_taken. + //finally goto not_taken + insts.push(Inst::Jal { dest: not_taken }); + } + // make sure we handle all cases. + assert!(cc_args.0 == 0); + insts + } + pub(crate) fn lower_br_icmp( + cc: IntCC, + a: ValueRegs, + b: ValueRegs, + taken: BranchTarget, + not_taken: BranchTarget, + ty: Type, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + if ty.bits() <= 64 { + let rs1 = a.only_reg().unwrap(); + let rs2 = b.only_reg().unwrap(); + let inst = Inst::CondBr { + taken, + not_taken, + kind: IntegerCompare { kind: cc, rs1, rs2 }, + }; + insts.push(inst); + return insts; + } + // compare i128 + let low = |cc: IntCC| -> IntegerCompare { + IntegerCompare { + rs1: a.regs()[0], + rs2: b.regs()[0], + kind: cc, + } + }; + let high = |cc: IntCC| -> IntegerCompare { + IntegerCompare { + rs1: a.regs()[1], + rs2: b.regs()[1], + kind: cc, + } + }; + match cc { + IntCC::Equal => { + // if high part not equal, + // then we can go to not_taken otherwise fallthrough. + insts.push(Inst::CondBr { + taken: not_taken, + not_taken: BranchTarget::zero(), + kind: high(IntCC::NotEqual), + }); + // the rest part. + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(IntCC::Equal), + }); + } + + IntCC::NotEqual => { + // if the high part not equal , + // we know the whole must be not equal, + // we can goto the taken part , otherwise fallthrought. + insts.push(Inst::CondBr { + taken, + not_taken: BranchTarget::zero(), // no branch + kind: high(IntCC::NotEqual), + }); + + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(IntCC::NotEqual), + }); + } + IntCC::SignedGreaterThanOrEqual + | IntCC::SignedLessThanOrEqual + | IntCC::UnsignedGreaterThanOrEqual + | IntCC::UnsignedLessThanOrEqual + | IntCC::SignedGreaterThan + | IntCC::SignedLessThan + | IntCC::UnsignedLessThan + | IntCC::UnsignedGreaterThan => { + // + insts.push(Inst::CondBr { + taken, + not_taken: BranchTarget::zero(), + kind: high(cc.without_equal()), + }); + // + insts.push(Inst::CondBr { + taken: not_taken, + not_taken: BranchTarget::zero(), + kind: high(IntCC::NotEqual), + }); + insts.push(Inst::CondBr { + taken, + not_taken, + kind: low(cc.unsigned()), + }); + } + } + insts + } + + /// check if float is unordered. + pub(crate) fn lower_float_unordered( + tmp: Writable, + ty: Type, + x: Reg, + y: Reg, + taken: BranchTarget, + not_taken: BranchTarget, + ) -> SmallInstVec { + let mut insts = SmallInstVec::new(); + let class_op = if ty == F32 { + FpuOPRR::FclassS + } else { + FpuOPRR::FclassD + }; + // if x is nan + insts.push(Inst::FpuRR { + frm: None, + alu_op: class_op, + rd: tmp, + rs: x, + }); + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(FClassResult::is_nan_bits() as i16), + }); + insts.push(Inst::CondBr { + taken, + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + // if y is nan. + insts.push(Inst::FpuRR { + frm: None, + alu_op: class_op, + rd: tmp, + rs: y, + }); + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(FClassResult::is_nan_bits() as i16), + }); + insts.push(Inst::CondBr { + taken, + not_taken, + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + }); + insts + } +} + +impl MachInstEmit for Inst { + type State = EmitState; + type Info = EmitInfo; + + fn emit( + &self, + allocs: &[Allocation], + sink: &mut MachBuffer, + emit_info: &Self::Info, + state: &mut EmitState, + ) { + let mut allocs = AllocationConsumer::new(allocs); + // N.B.: we *must* not exceed the "worst-case size" used to compute + // where to insert islands, except when islands are explicitly triggered + // (with an `EmitIsland`). We check this in debug builds. This is `mut` + // to allow disabling the check for `JTSequence`, which is always + // emitted following an `EmitIsland`. + let mut start_off = sink.cur_offset(); + match self { + &Inst::Nop0 => { + // do nothing + } + // Addi x0, x0, 0 + &Inst::Nop4 => { + let x = Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: Writable::from_reg(zero_reg()), + rs: zero_reg(), + imm12: Imm12::zero(), + }; + x.emit(&[], sink, emit_info, state) + } + &Inst::RawData { ref data } => { + // emit_island if need, right now data is not very long. + let length = data.len() as CodeOffset; + if sink.island_needed(length) { + sink.emit_island(length); + } + sink.put_data(&data[..]); + // safe to disable code length check. + start_off = sink.cur_offset(); + } + &Inst::Lui { rd, ref imm } => { + let rd = allocs.next_writable(rd); + let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.as_u32() << 12); + sink.put4(x); + } + &Inst::FpuRR { + frm, + alu_op, + rd, + rs, + } => { + let rs = allocs.next(rs); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3(frm) << 12 + | reg_to_gpr_num(rs) << 15 + | alu_op.rs2_funct5() << 20 + | alu_op.funct7() << 25; + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && alu_op.is_convert_to_int() { + sink.add_trap(TrapCode::BadConversionToInteger); + } + sink.put4(x); + } + &Inst::FpuRRRR { + alu_op, + rd, + rs1, + rs2, + rs3, + frm, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rs3 = allocs.next(rs3); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3(frm) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct2() << 25 + | reg_to_gpr_num(rs3) << 27; + + sink.put4(x); + } + &Inst::FpuRRR { + alu_op, + frm, + rd, + rs1, + rs2, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + + let x: u32 = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | (alu_op.funct3(frm)) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct7() << 25; + sink.put4(x); + } + &Inst::Unwind { ref inst } => { + sink.add_unwind(inst.clone()); + } + &Inst::DummyUse { reg } => { + allocs.next(reg); + } + &Inst::AluRRR { + alu_op, + rd, + rs1, + rs2, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + let (rs1, rs2) = if alu_op.reverse_rs() { + (rs2, rs1) + } else { + (rs1, rs2) + }; + + let x: u32 = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | (alu_op.funct3()) << 12 + | reg_to_gpr_num(rs1) << 15 + | reg_to_gpr_num(rs2) << 20 + | alu_op.funct7() << 25; + sink.put4(x); + } + &Inst::AluRRImm12 { + alu_op, + rd, + rs, + imm12, + } => { + let rs = allocs.next(rs); + let rd = allocs.next_writable(rd); + let x = alu_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | alu_op.funct3() << 12 + | reg_to_gpr_num(rs) << 15 + | alu_op.imm12(imm12) << 20; + sink.put4(x); + } + &Inst::Load { + rd, + op, + from, + flags, + } => { + let x; + let base = from.get_base_register(); + let base = allocs.next(base); + let rd = allocs.next_writable(rd); + let offset = from.get_offset_with_state(state); + if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + x = op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | op.funct3() << 12 + | reg_to_gpr_num(base) << 15 + | (imm12.as_u32()) << 20; + sink.put4(x); + } else { + let tmp = writable_spilltmp_reg(); + let mut insts = + LoadConstant::U64(offset as u64).load_constant_and_add(tmp, base); + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + insts.push(Inst::Load { + op, + from: AMode::RegOffset(tmp.to_reg(), 0, I64), + rd, + flags, + }); + insts + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + } + &Inst::Store { op, src, flags, to } => { + let base = allocs.next(to.get_base_register()); + let src = allocs.next(src); + let offset = to.get_offset_with_state(state); + let x; + if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) { + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + x = op.op_code() + | (imm12.as_u32() & 0x1f) << 7 + | op.funct3() << 12 + | reg_to_gpr_num(base) << 15 + | reg_to_gpr_num(src) << 20 + | (imm12.as_u32() >> 5) << 25; + sink.put4(x); + } else { + let tmp = writable_spilltmp_reg(); + let mut insts = + LoadConstant::U64(offset as u64).load_constant_and_add(tmp, base); + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(TrapCode::HeapOutOfBounds); + } + insts.push(Inst::Store { + op, + to: AMode::RegOffset(tmp.to_reg(), 0, I64), + flags, + src, + }); + insts + .into_iter() + .for_each(|inst| inst.emit(&[], sink, emit_info, state)); + } + } + + &Inst::ReferenceCheck { rd, op, x } => { + let x = allocs.next(x); + let rd = allocs.next_writable(rd); + let mut insts = SmallInstVec::new(); + match op { + ReferenceCheckOP::IsNull => { + insts.push(Inst::CondBr { + taken: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE * 3), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: x, + }, + }); + // here is false + insts.push(Inst::load_imm12(rd, Imm12::FALSE)); + insts.push(Inst::Jal { + dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE * 2), + }); + // here is true + insts.push(Inst::load_imm12(rd, Imm12::TRUE)); + } + + ReferenceCheckOP::IsInvalid => { + // todo:: right now just check if it is null + // null is a valid reference?????? + insts.push(Inst::CondBr { + taken: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE * 3), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: x, + }, + }); + // here is false + insts.push(Inst::load_imm12(rd, Imm12::FALSE)); + insts.push(Inst::Jal { + dest: BranchTarget::ResolvedOffset(Inst::INSTRUCTION_SIZE * 2), + }); + // here is true + insts.push(Inst::load_imm12(rd, Imm12::TRUE)); + } + } + + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + &Inst::Args { .. } => { + // Nothing: this is a pseudoinstruction that serves + // only to constrain registers at a certain point. + } + &Inst::Ret { .. } => { + //jalr x0, x1, 0 + let x: u32 = (0b1100111) | (1 << 15); + sink.put4(x); + } + + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits: _to_bits, + } => { + let rn = allocs.next(rn); + let rd = allocs.next_writable(rd); + let mut insts = SmallInstVec::new(); + let shift_bits = (64 - from_bits) as i16; + let is_u8 = || from_bits == 8 && signed == false; + if is_u8() { + // special for u8. + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd, + rs: rn, + imm12: Imm12::from_bits(255), + }); + } else { + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd, + rs: rn, + imm12: Imm12::from_bits(shift_bits), + }); + insts.push(Inst::AluRRImm12 { + alu_op: if signed { + AluOPRRI::Srai + } else { + AluOPRRI::Srli + }, + rd, + rs: rd.to_reg(), + imm12: Imm12::from_bits(shift_bits), + }); + } + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + &Inst::AjustSp { amount } => { + if let Some(imm) = Imm12::maybe_from_u64(amount as u64) { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_stack_reg(), + rs: stack_reg(), + imm12: imm, + } + .emit(&[], sink, emit_info, state); + } else { + let tmp = writable_spilltmp_reg(); + let mut insts = Inst::load_constant_u64(tmp, amount as u64); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_stack_reg(), + rs1: tmp.to_reg(), + rs2: stack_reg(), + }); + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } + &Inst::Call { ref info } => { + // call + match info.dest { + ExternalName::User { .. } => { + if info.opcode.is_call() { + sink.add_call_site(info.opcode); + } + sink.add_reloc(Reloc::RiscvCall, &info.dest, 0); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); + } + Inst::construct_auipc_and_jalr( + Some(writable_link_reg()), + writable_link_reg(), + 0, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + ExternalName::LibCall(..) + | ExternalName::TestCase { .. } + | ExternalName::KnownSymbol(..) => { + // use indirect call. it is more simple. + // load ext name. + Inst::LoadExtName { + rd: writable_spilltmp_reg2(), + name: Box::new(info.dest.clone()), + offset: 0, + } + .emit(&[], sink, emit_info, state); + + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + if info.opcode.is_call() { + sink.add_call_site(info.opcode); + } + // call + Inst::Jalr { + rd: writable_link_reg(), + base: spilltmp_reg2(), + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + } + } + } + &Inst::CallInd { ref info } => { + let rn = allocs.next(info.rn); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + + if info.opcode.is_call() { + sink.add_call_site(info.opcode); + } + Inst::Jalr { + rd: writable_link_reg(), + base: rn, + offset: Imm12::zero(), + } + .emit(&[], sink, emit_info, state); + } + + &Inst::Jal { dest } => { + let code: u32 = 0b1101111; + match dest { + BranchTarget::Label(lable) => { + sink.use_label_at_offset(start_off, lable, LabelUse::Jal20); + sink.add_uncond_branch(start_off, start_off + 4, lable); + sink.put4(code); + } + BranchTarget::ResolvedOffset(offset) => { + let offset = offset as i64; + if offset != 0 { + if LabelUse::Jal20.offset_in_range(offset) { + let mut code = code.to_le_bytes(); + LabelUse::Jal20.patch_raw_offset(&mut code, offset); + sink.put_data(&code[..]); + } else { + Inst::construct_auipc_and_jalr( + None, + writable_spilltmp_reg(), + offset, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } else { + // CondBr often generate Jal {dest : 0}, means otherwise no jump. + } + } + } + } + &Inst::CondBr { + taken, + not_taken, + kind, + } => { + let mut kind = kind; + kind.rs1 = allocs.next(kind.rs1); + kind.rs2 = allocs.next(kind.rs2); + match taken { + BranchTarget::Label(label) => { + let code = kind.emit(); + let code_inverse = kind.inverse().emit().to_le_bytes(); + sink.use_label_at_offset(start_off, label, LabelUse::B12); + sink.add_cond_branch(start_off, start_off + 4, label, &code_inverse); + sink.put4(code); + } + BranchTarget::ResolvedOffset(offset) => { + assert!(offset != 0); + if LabelUse::B12.offset_in_range(offset as i64) { + let code = kind.emit(); + let mut code = code.to_le_bytes(); + LabelUse::B12.patch_raw_offset(&mut code, offset as i64); + sink.put_data(&code[..]) + } else { + let mut code = kind.emit().to_le_bytes(); + // jump over the condbr , 4 bytes. + LabelUse::B12.patch_raw_offset(&mut code[..], 4); + sink.put_data(&code[..]); + Inst::construct_auipc_and_jalr( + None, + writable_spilltmp_reg(), + offset as i64, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } + } + Inst::Jal { dest: not_taken }.emit(&[], sink, emit_info, state); + } + + &Inst::Mov { rd, rm, ty } => { + if rd.to_reg() != rm { + let rm = allocs.next(rm); + let rd = allocs.next_writable(rd); + if ty.is_float() { + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjS + } else { + FpuOPRRR::FsgnjD + }, + frm: None, + rd: rd, + rs1: rm, + rs2: rm, + } + .emit(&[], sink, emit_info, state); + } else { + let x = Inst::AluRRImm12 { + alu_op: AluOPRRI::Ori, + rd: rd, + rs: rm, + imm12: Imm12::zero(), + }; + x.emit(&[], sink, emit_info, state); + } + } + } + &Inst::BrTableCheck { + index, + targets_len, + default_, + } => { + let index = allocs.next(index); + // load + Inst::load_constant_u32(writable_spilltmp_reg(), targets_len as u64) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::CondBr { + taken: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 3), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::UnsignedLessThan, + rs1: index, + rs2: spilltmp_reg(), + }, + } + .emit(&[], sink, emit_info, state); + sink.use_label_at_offset( + sink.cur_offset(), + default_.as_label().unwrap(), + LabelUse::PCRel32, + ); + Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + &Inst::BrTable { + index, + tmp1, + ref targets, + } => { + let index = allocs.next(index); + let tmp1 = allocs.next_writable(tmp1); + let mut insts = SmallInstVec::new(); + // get current pc. + insts.push(Inst::Auipc { + rd: tmp1, + imm: Imm20::from_bits(0), + }); + // t *= 8; very jump that I emit is 8 byte size. + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: writable_spilltmp_reg(), + rs: index, + imm12: Imm12::from_bits(3), + }); + // tmp1 += t + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: tmp1, + rs1: tmp1.to_reg(), + rs2: spilltmp_reg(), + }); + insts.push(Inst::Jalr { + rd: writable_zero_reg(), + base: tmp1.to_reg(), + offset: Imm12::from_bits(16), + }); + + // here is all the jumps. + let mut need_label_use = vec![]; + for t in targets { + need_label_use.push((insts.len(), t.clone())); + insts.extend(Inst::construct_auipc_and_jalr( + None, + writable_spilltmp_reg(), + 0, + )); + } + // emit island if need. + let distance = (insts.len() * 4) as u32; + if sink.island_needed(distance) { + sink.emit_island(distance); + } + let mut need_label_use = &need_label_use[..]; + insts.into_iter().enumerate().for_each(|(index, inst)| { + if !need_label_use.is_empty() && need_label_use[0].0 == index { + sink.use_label_at_offset( + sink.cur_offset(), + need_label_use[0].1.as_label().unwrap(), + LabelUse::PCRel32, + ); + need_label_use = &need_label_use[1..]; + } + inst.emit(&[], sink, emit_info, state); + }); + // emit the island before, so we can safely + // disable the worst-case-size check in this case. + start_off = sink.cur_offset(); + } + + &Inst::VirtualSPOffsetAdj { amount } => { + log::trace!( + "virtual sp offset adjusted by {} -> {}", + amount, + state.virtual_sp_offset + amount + ); + state.virtual_sp_offset += amount; + } + &Inst::Atomic { + op, + rd, + addr, + src, + amo, + } => { + let addr = allocs.next(addr); + let src = allocs.next(src); + let rd = allocs.next_writable(rd); + let srcloc = state.cur_srcloc(); + if !srcloc.is_default() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + let x = op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | op.funct3() << 12 + | reg_to_gpr_num(addr) << 15 + | reg_to_gpr_num(src) << 20 + | op.funct7(amo) << 25; + + sink.put4(x); + } + &Inst::Fence { pred, succ } => { + let x = 0b0001111 + | 0b00000 << 7 + | 0b000 << 12 + | 0b00000 << 15 + | (succ as u32) << 20 + | (pred as u32) << 24; + + sink.put4(x); + } + &Inst::FenceI => sink.put4(0x0000100f), + &Inst::Auipc { rd, imm } => { + let rd = allocs.next_writable(rd); + let x = enc_auipc(rd, imm); + sink.put4(x); + } + + &Inst::LoadAddr { rd, mem } => { + let base = mem.get_base_register(); + let base = allocs.next(base); + let rd = allocs.next_writable(rd); + let offset = mem.get_offset_with_state(state); + if let Some(offset) = Imm12::maybe_from_u64(offset as u64) { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: rd, + rs: base, + imm12: offset, + } + .emit(&[], sink, emit_info, state); + } else { + let insts = LoadConstant::U64(offset as u64).load_constant_and_add(rd, base); + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + } + + &Inst::Fcmp { + rd, + cc, + ty, + rs1, + rs2, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + let label_true = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::lower_br_fcmp( + cc, + rs1, + rs2, + BranchTarget::Label(label_true), + BranchTarget::zero(), + ty, + rd, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // here is not taken. + Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state); + // jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is true + sink.bind_label(label_true); + Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over); + } + + &Inst::Select { + ref dst, + condition, + ref x, + ref y, + ty: _ty, + } => { + let condition = allocs.next(condition); + let x = alloc_value_regs(x, &mut allocs); + let y = alloc_value_regs(y, &mut allocs); + let dst: Vec<_> = dst + .clone() + .into_iter() + .map(|r| allocs.next_writable(r)) + .collect(); + + let mut insts = SmallInstVec::new(); + let label_false = sink.get_label(); + insts.push(Inst::CondBr { + taken: BranchTarget::Label(label_false), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: condition, + rs2: zero_reg(), + }, + }); + // here is the true + // select the first value + insts.extend(gen_moves(&dst[..], x.regs())); + let label_jump_over = sink.get_label(); + insts.push(Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + }); + // here is false + insts + .drain(..) + .for_each(|i: Inst| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_false); + // select second value1 + insts.extend(gen_moves(&dst[..], y.regs())); + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_jump_over); + } + &Inst::Jalr { rd, base, offset } => { + let rd = allocs.next_writable(rd); + let x = enc_jalr(rd, base, offset); + sink.put4(x); + } + &Inst::ECall => { + sink.put4(0x00000073); + } + &Inst::EBreak => { + sink.put4(0x00100073); + } + &Inst::Icmp { + cc, + rd, + ref a, + ref b, + ty, + } => { + let a = alloc_value_regs(a, &mut allocs); + let b = alloc_value_regs(b, &mut allocs); + let rd = allocs.next_writable(rd); + let label_true = sink.get_label(); + let label_false = sink.get_label(); + Inst::lower_br_icmp( + cc, + a, + b, + BranchTarget::Label(label_true), + BranchTarget::Label(label_false), + ty, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + sink.bind_label(label_true); + Inst::load_imm12(rd, Imm12::from_bits(-1)).emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::offset(Inst::INSTRUCTION_SIZE * 2), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_false); + Inst::load_imm12(rd, Imm12::from_bits(0)).emit(&[], sink, emit_info, state); + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + ty, + } => { + let offset = allocs.next(offset); + let e = allocs.next(e); + let addr = allocs.next(addr); + let v = allocs.next(v); + let t0 = allocs.next_writable(t0); + let dst = allocs.next_writable(dst); + + // # addr holds address of memory location + // # e holds expected value + // # v holds desired value + // # dst holds return value + // cas: + // lr.w dst, (addr) # Load original value. + // bne dst, e, fail # Doesn’t match, so fail. + // sc.w t0, v, (addr) # Try to update. + // bnez t0 , cas # if store not ok,retry. + // fail: + let fail_label = sink.get_label(); + let cas_lebel = sink.get_label(); + sink.bind_label(cas_lebel); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: dst, + addr, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + let origin_value = if ty.bits() < 32 { + AtomicOP::extract(t0, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + t0.to_reg() + } else if ty.bits() == 32 { + Inst::Extend { + rd: t0, + rn: dst.to_reg(), + signed: false, + from_bits: 32, + to_bits: 64, + } + .emit(&[], sink, emit_info, state); + t0.to_reg() + } else { + dst.to_reg() + }; + Inst::CondBr { + taken: BranchTarget::Label(fail_label), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: e, + rs2: origin_value, + }, + } + .emit(&[], sink, emit_info, state); + let store_value = if ty.bits() < 32 { + // reload value to t0. + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: t0, + addr, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // set reset part. + AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + t0.to_reg() + } else { + v + }; + Inst::Atomic { + op: AtomicOP::store_op(ty), + rd: t0, + addr, + src: store_value, + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // check is our value stored. + Inst::CondBr { + taken: BranchTarget::Label(cas_lebel), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: t0.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(fail_label); + } + &Inst::AtomicRmwLoop { + offset, + op, + dst, + ty, + p, + x, + t0, + } => { + let offset = allocs.next(offset); + let p = allocs.next(p); + let x = allocs.next(x); + let t0 = allocs.next_writable(t0); + let dst = allocs.next_writable(dst); + let retry = sink.get_label(); + sink.bind_label(retry); + // load old value. + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: dst, + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + // + + let store_value: Reg = match op { + crate::ir::AtomicRmwOp::Add + | crate::ir::AtomicRmwOp::Sub + | crate::ir::AtomicRmwOp::And + | crate::ir::AtomicRmwOp::Or + | crate::ir::AtomicRmwOp::Xor => { + AtomicOP::extract(t0, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::AluRRR { + alu_op: match op { + crate::ir::AtomicRmwOp::Add => AluOPRRR::Add, + crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub, + crate::ir::AtomicRmwOp::And => AluOPRRR::And, + crate::ir::AtomicRmwOp::Or => AluOPRRR::Or, + crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor, + _ => unreachable!(), + }, + rd: t0, + rs1: t0.to_reg(), + rs2: x, + } + .emit(&[], sink, emit_info, state); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + crate::ir::AtomicRmwOp::Nand => { + let x2 = if ty.bits() < 32 { + AtomicOP::extract(t0, offset, dst.to_reg(), ty) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + t0.to_reg() + } else { + dst.to_reg() + }; + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: t0, + rs1: x, + rs2: x2, + } + .emit(&[], sink, emit_info, state); + Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state); + if ty.bits() < 32 { + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } else { + t0.to_reg() + } + } + + crate::ir::AtomicRmwOp::Umin + | crate::ir::AtomicRmwOp::Umax + | crate::ir::AtomicRmwOp::Smin + | crate::ir::AtomicRmwOp::Smax => { + let label_select_done = sink.get_label(); + if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax + { + AtomicOP::extract(t0, offset, dst.to_reg(), ty) + } else { + AtomicOP::extract_sext(t0, offset, dst.to_reg(), ty) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::lower_br_icmp( + match op { + crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan, + crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan, + crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan, + crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan, + _ => unreachable!(), + }, + ValueRegs::one(t0.to_reg()), + ValueRegs::one(x), + BranchTarget::Label(label_select_done), + BranchTarget::zero(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // here we select x. + Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state); + sink.bind_label(label_select_done); + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + t0.to_reg(), + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + crate::ir::AtomicRmwOp::Xchg => { + Inst::Atomic { + op: AtomicOP::load_op(ty), + rd: writable_spilltmp_reg2(), + addr: p, + src: zero_reg(), + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + AtomicOP::merge( + writable_spilltmp_reg2(), + writable_spilltmp_reg(), + offset, + x, + ty, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + spilltmp_reg2() + } + }; + + Inst::Atomic { + op: AtomicOP::store_op(ty), + rd: t0, + addr: p, + src: store_value, + amo: AMO::SeqCst, + } + .emit(&[], sink, emit_info, state); + + // if store is not ok,retry. + Inst::CondBr { + taken: BranchTarget::Label(retry), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: t0.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + } + + &Inst::IntSelect { + op, + ref dst, + ref x, + ref y, + ty, + } => { + let x = alloc_value_regs(x, &mut allocs); + let y = alloc_value_regs(y, &mut allocs); + let dst: Vec<_> = dst.iter().map(|r| allocs.next_writable(*r)).collect(); + let label_true = sink.get_label(); + let label_false = sink.get_label(); + let label_done = sink.get_label(); + Inst::lower_br_icmp( + op.to_int_cc(), + x, + y, + BranchTarget::Label(label_true), + BranchTarget::Label(label_false), + ty, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let gen_move = |dst: &Vec>, + val: &ValueRegs, + sink: &mut MachBuffer, + state: &mut EmitState| { + let ty = if ty.bits() == 128 { I64 } else { ty }; + let mut insts = SmallInstVec::new(); + insts.push(Inst::Mov { + rd: dst[0], + rm: val.regs()[0], + ty, + }); + if ty.bits() == 128 { + insts.push(Inst::Mov { + rd: dst[1], + rm: val.regs()[1], + ty, + }); + } + insts + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + }; + //here is true , use x. + sink.bind_label(label_true); + gen_move(&dst, &x, sink, state); + Inst::gen_jump(label_done).emit(&[], sink, emit_info, state); + // here is false use y + sink.bind_label(label_false); + gen_move(&dst, &y, sink, state); + sink.bind_label(label_done); + } + &Inst::Csr { + csr_op, + rd, + rs, + imm, + csr, + } => { + let rs = rs.map(|r| allocs.next(r)); + let rd = allocs.next_writable(rd); + let x = csr_op.op_code() + | reg_to_gpr_num(rd.to_reg()) << 7 + | csr_op.funct3() << 12 + | csr_op.rs1(rs, imm) << 15 + | csr.as_u32() << 20; + + sink.put4(x); + } + + &Inst::SelectReg { + condition, + rd, + rs1, + rs2, + } => { + let mut condition = condition.clone(); + condition.rs1 = allocs.next(condition.rs1); + condition.rs2 = allocs.next(condition.rs2); + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let rd = allocs.next_writable(rd); + let label_true = sink.get_label(); + let label_jump_over = sink.get_label(); + sink.use_label_at_offset(sink.cur_offset(), label_true, LabelUse::B12); + let x = condition.emit(); + sink.put4(x); + // here is false , use rs2 + Inst::gen_move(rd, rs2, I64).emit(&[], sink, emit_info, state); + // and jump over + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here condition is true , use rs1 + sink.bind_label(label_true); + Inst::gen_move(rd, rs1, I64).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over); + } + &Inst::FcvtToInt { + is_sat, + rd, + rs, + is_signed, + in_type, + out_type, + tmp, + } => { + let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_jump_over = sink.get_label(); + // get if nan. + Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state); + // jump to nan. + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs2: zero_reg(), + rs1: rd.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + + if !is_sat { + let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8); + let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8); + if in_type == F32 { + Inst::load_fp_constant32( + tmp, + f32_bits(f32_bounds.0), + writable_spilltmp_reg(), + ) + } else { + Inst::load_fp_constant64( + tmp, + f64_bits(f64_bounds.0), + writable_spilltmp_reg(), + ) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::TrapFf { + cc: FloatCC::LessThanOrEqual, + x: rs, + y: tmp.to_reg(), + ty: in_type, + tmp: rd, + trap_code: TrapCode::IntegerOverflow, + } + .emit(&[], sink, emit_info, state); + if in_type == F32 { + Inst::load_fp_constant32( + tmp, + f32_bits(f32_bounds.1), + writable_spilltmp_reg(), + ) + } else { + Inst::load_fp_constant64( + tmp, + f64_bits(f64_bounds.1), + writable_spilltmp_reg(), + ) + } + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::TrapFf { + cc: FloatCC::GreaterThanOrEqual, + x: rs, + y: tmp.to_reg(), + ty: in_type, + tmp: rd, + trap_code: TrapCode::IntegerOverflow, + } + .emit(&[], sink, emit_info, state); + } + // convert to int normally. + Inst::FpuRR { + frm: Some(FRM::RTZ), + alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type), + rd, + rs, + } + .emit(&[], sink, emit_info, state); + // I already have the result,jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan , move 0 into rd register + sink.bind_label(label_nan); + if is_sat { + Inst::load_imm12(rd, Imm12::from_bits(0)).emit(&[], sink, emit_info, state); + } else { + // here is ud2. + Inst::Udf { + trap_code: TrapCode::BadConversionToInteger, + } + .emit(&[], sink, emit_info, state); + } + // bind jump_over + sink.bind_label(label_jump_over); + } + + &Inst::LoadExtName { + rd, + ref name, + offset, + } => { + let rd = allocs.next_writable(rd); + // get the current pc. + Inst::Auipc { + rd: rd, + imm: Imm20::from_bits(0), + } + .emit(&[], sink, emit_info, state); + // load the value. + Inst::Load { + rd: rd, + op: LoadOP::Ld, + flags: MemFlags::trusted(), + from: AMode::RegOffset( + rd.to_reg(), + 12, // auipc load and jal. + I64, + ), + } + .emit(&[], sink, emit_info, state); + // jump over. + Inst::Jal { + // jal and abs8 size for 12. + dest: BranchTarget::offset(12), + } + .emit(&[], sink, emit_info, state); + + sink.add_reloc(Reloc::Abs8, name.as_ref(), offset); + sink.put8(0); + } + &Inst::TrapIfC { + rs1, + rs2, + cc, + trap_code, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_trap), + not_taken: BranchTarget::Label(label_jump_over), + kind: IntegerCompare { kind: cc, rs1, rs2 }, + } + .emit(&[], sink, emit_info, state); + // trap + sink.bind_label(label_trap); + Inst::Udf { + trap_code: trap_code, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over); + } + &Inst::TrapIf { test, trap_code } => { + let test = allocs.next(test); + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_trap), + not_taken: BranchTarget::Label(label_jump_over), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: test, + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // trap + sink.bind_label(label_trap); + Inst::Udf { + trap_code: trap_code, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over); + } + &Inst::TrapFf { + cc, + x, + y, + ty, + trap_code, + tmp, + } => { + let x = allocs.next(x); + let y = allocs.next(y); + let tmp = allocs.next_writable(tmp); + let label_trap = sink.get_label(); + let label_jump_over = sink.get_label(); + Inst::lower_br_fcmp( + cc, + x, + y, + BranchTarget::Label(label_trap), + BranchTarget::Label(label_jump_over), + ty, + tmp, + ) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // trap + sink.bind_label(label_trap); + Inst::Udf { + trap_code: trap_code, + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over); + } + + &Inst::Udf { trap_code } => { + sink.add_trap(trap_code); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + // https://github.com/riscv/riscv-isa-manual/issues/850 + // all zero will cause invalid opcode. + sink.put4(0); + } + &Inst::SelectIf { + if_spectre_guard: _if_spectre_guard, // _if_spectre_guard not use because it is used to not be removed by optimization pass and some other staff. + ref rd, + test, + ref x, + ref y, + } => { + let label_select_x = sink.get_label(); + let label_select_y = sink.get_label(); + let label_jump_over = sink.get_label(); + let test = allocs.next(test); + let x = alloc_value_regs(x, &mut allocs); + let y = alloc_value_regs(y, &mut allocs); + let rd: Vec<_> = rd.iter().map(|r| allocs.next_writable(*r)).collect(); + Inst::CondBr { + taken: BranchTarget::Label(label_select_x), + not_taken: BranchTarget::Label(label_select_y), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: test, + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + + // here select x. + sink.bind_label(label_select_x); + gen_moves(&rd[..], x.regs()) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // jump over + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here select y. + sink.bind_label(label_select_y); + gen_moves(&rd[..], y.regs()) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + sink.bind_label(label_jump_over); + } + &Inst::AtomicLoad { rd, ty, p } => { + let p = allocs.next(p); + let rd = allocs.next_writable(rd); + // emit the fence. + Inst::Fence { + pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + // load. + Inst::Load { + rd: rd, + op: LoadOP::from_type(ty), + flags: MemFlags::new(), + from: AMode::RegOffset(p, 0, ty), + } + .emit(&[], sink, emit_info, state); + Inst::Fence { + pred: Inst::FENCE_REQ_R, + succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + } + &Inst::AtomicStore { src, ty, p } => { + let src = allocs.next(src); + let p = allocs.next(p); + Inst::Fence { + pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, + succ: Inst::FENCE_REQ_W, + } + .emit(&[], sink, emit_info, state); + Inst::Store { + to: AMode::RegOffset(p, 0, ty), + op: StoreOP::from_type(ty), + flags: MemFlags::new(), + src, + } + .emit(&[], sink, emit_info, state); + } + &Inst::FloatRound { + op, + rd, + int_tmp, + f_tmp, + rs, + ty, + } => { + // this code is port from glibc ceil floor ... implementation. + let rs = allocs.next(rs); + let int_tmp = allocs.next_writable(int_tmp); + let f_tmp = allocs.next_writable(f_tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_x = sink.get_label(); + let label_jump_over = sink.get_label(); + // check if is nan. + Inst::emit_not_nan(int_tmp, rs, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: int_tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + fn max_value_need_round(ty: Type) -> u64 { + match ty { + F32 => { + let x: u64 = 1 << f32::MANTISSA_DIGITS; + let x = x as f32; + let x = u32::from_le_bytes(x.to_le_bytes()); + x as u64 + } + F64 => { + let x: u64 = 1 << f64::MANTISSA_DIGITS; + let x = x as f64; + u64::from_le_bytes(x.to_le_bytes()) + } + _ => unreachable!(), + } + } + // load max value need to round. + if ty == F32 { + Inst::load_fp_constant32( + f_tmp, + max_value_need_round(ty) as u32, + writable_spilltmp_reg(), + ) + } else { + Inst::load_fp_constant64( + f_tmp, + max_value_need_round(ty), + writable_spilltmp_reg(), + ) + } + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + // get abs value. + Inst::emit_fabs(rd, rs, ty).emit(&[], sink, emit_info, state); + Inst::lower_br_fcmp( + FloatCC::GreaterThan, + // abs value > max_value_need_round + rd.to_reg(), + f_tmp.to_reg(), + BranchTarget::Label(label_x), + BranchTarget::zero(), + ty, + int_tmp, + ) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + //convert to int. + Inst::FpuRR { + alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64), + frm: Some(op.to_frm()), + rd: int_tmp, + rs: rs, + } + .emit(&[], sink, emit_info, state); + //convert back. + Inst::FpuRR { + alu_op: FpuOPRR::int_convert_2_float_op(I64, true, ty), + frm: Some(op.to_frm()), + rd, + rs: int_tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + // copy sign. + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FsgnjS + } else { + FpuOPRRR::FsgnjD + }, + frm: None, + rd, + rs1: rd.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + // jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan. + sink.bind_label(label_nan); + Inst::FpuRRR { + alu_op: if ty == F32 { + FpuOPRRR::FaddS + } else { + FpuOPRRR::FaddD + }, + frm: None, + rd: rd, + rs1: rs, + rs2: rs, + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here select origin x. + sink.bind_label(label_x); + Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over); + } + &Inst::FloatSelectPseudo { + op, + rd, + tmp, + rs1, + rs2, + ty, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_rs2 = sink.get_label(); + let label_jump_over = sink.get_label(); + let lt_op = if ty == F32 { + FpuOPRRR::FltS + } else { + FpuOPRRR::FltD + }; + Inst::FpuRRR { + alu_op: lt_op, + frm: None, + rd: tmp, + rs1: if op == FloatSelectOP::Max { rs1 } else { rs2 }, + rs2: if op == FloatSelectOP::Max { rs2 } else { rs1 }, + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_rs2), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // here select rs1 as result. + Inst::gen_move(rd, rs1, ty).emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_rs2); + Inst::gen_move(rd, rs2, ty).emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over); + } + + &Inst::FloatSelect { + op, + rd, + tmp, + rs1, + rs2, + ty, + } => { + let rs1 = allocs.next(rs1); + let rs2 = allocs.next(rs2); + let tmp = allocs.next_writable(tmp); + let rd = allocs.next_writable(rd); + let label_nan = sink.get_label(); + let label_jump_over = sink.get_label(); + // check if rs1 is nan. + Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // check if rs2 is nan. + Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_nan), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: tmp.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // here rs1 and rs2 is not nan. + Inst::FpuRRR { + alu_op: op.to_fpuoprrr(ty), + frm: None, + rd: rd, + rs1: rs1, + rs2: rs2, + } + .emit(&[], sink, emit_info, state); + // special handle for +0 or -0. + { + // check is rs1 and rs2 all equal to zero. + let label_done = sink.get_label(); + { + // if rs1 == 0 + let mut insts = Inst::emit_if_float_not_zero( + tmp, + rs1, + ty, + BranchTarget::Label(label_done), + BranchTarget::zero(), + ); + insts.extend(Inst::emit_if_float_not_zero( + tmp, + rs2, + ty, + BranchTarget::Label(label_done), + BranchTarget::zero(), + )); + insts + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + } + Inst::FpuRR { + alu_op: FpuOPRR::move_f_to_x_op(ty), + frm: None, + rd: tmp, + rs: rs1, + } + .emit(&[], sink, emit_info, state); + Inst::FpuRR { + alu_op: FpuOPRR::move_f_to_x_op(ty), + frm: None, + rd: writable_spilltmp_reg(), + rs: rs2, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: if op == FloatSelectOP::Max { + AluOPRRR::And + } else { + AluOPRRR::Or + }, + rd: tmp, + rs1: tmp.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + // move back to rd. + Inst::FpuRR { + alu_op: FpuOPRR::move_x_to_f_op(ty), + frm: None, + rd, + rs: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + // + sink.bind_label(label_done); + } + // we have the reuslt,jump over. + Inst::Jal { + dest: BranchTarget::Label(label_jump_over), + } + .emit(&[], sink, emit_info, state); + // here is nan. + sink.bind_label(label_nan); + op.snan_bits(tmp, ty) + .into_iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + // move to rd. + Inst::FpuRR { + alu_op: FpuOPRR::move_x_to_f_op(ty), + frm: None, + rd, + rs: tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_jump_over); + } + &Inst::Popcnt { + sum, + tmp, + step, + rs, + ty, + } => { + let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let sum = allocs.next_writable(sum); + // load 0 to sum , init. + Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); + // load + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and add sum. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + let label_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_over), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: sum, + rs: sum.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done); + } + &Inst::Rev8 { rs, rd, tmp, step } => { + let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let rd = allocs.next_writable(rd); + // init. + Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); + Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state); + // load 56 to step. + Inst::load_imm12(step, Imm12::from_bits(56)).emit(&[], sink, emit_info, state); + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThan, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: writable_spilltmp_reg(), + rs: tmp.to_reg(), + imm12: Imm12::from_bits(255), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: writable_spilltmp_reg(), + rs1: spilltmp_reg(), + rs2: step.to_reg(), + } + .emit(&[], sink, emit_info, state); + + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: spilltmp_reg(), + } + .emit(&[], sink, emit_info, state); + { + // reset step + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-8), + } + .emit(&[], sink, emit_info, state); + //reset tmp. + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(8), + } + .emit(&[], sink, emit_info, state); + // loop. + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_done); + } + &Inst::Cltz { + sum, + tmp, + step, + rs, + leading, + ty, + } => { + let rs = allocs.next(rs); + let tmp = allocs.next_writable(tmp); + let step = allocs.next_writable(step); + let sum = allocs.next_writable(sum); + // load 0 to sum , init. + Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); + // load + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + if leading { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + } + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and add sum. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: sum, + rs: sum.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: if leading { + AluOPRRI::Srli + } else { + AluOPRRI::Slli + }, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done); + } + &Inst::Brev8 { + rs, + ty, + step, + tmp, + tmp2, + rd, + } => { + let rs = allocs.next(rs); + let step = allocs.next_writable(step); + let tmp = allocs.next_writable(tmp); + let tmp2 = allocs.next_writable(tmp2); + let rd = allocs.next_writable(rd); + Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); + Inst::load_imm12(step, Imm12::from_bits(ty.bits() as i16)).emit( + &[], + sink, + emit_info, + state, + ); + // + Inst::load_imm12(tmp, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + } + .emit(&[], sink, emit_info, state); + Inst::load_imm12(tmp2, Imm12::from_bits(1)).emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits((ty.bits() - 8) as i16), + } + .emit(&[], sink, emit_info, state); + + let label_done = sink.get_label(); + let label_loop = sink.get_label(); + sink.bind_label(label_loop); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::SignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // test and set bit. + { + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_spilltmp_reg2(), + rs1: tmp.to_reg(), + rs2: rs, + } + .emit(&[], sink, emit_info, state); + let label_over = sink.get_label(); + Inst::CondBr { + taken: BranchTarget::Label(label_over), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::Equal, + rs1: zero_reg(), + rs2: spilltmp_reg2(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: rd, + rs1: rd.to_reg(), + rs2: tmp2.to_reg(), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over); + } + // set step and tmp. + { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: step, + rs: step.to_reg(), + imm12: Imm12::from_bits(-1), + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp, + rs: tmp.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + { + // reset tmp2 + // if (step %=8 == 0) then tmp2 = tmp2 >> 15 + // if (step %=8 != 0) then tmp2 = tmp2 << 1 + let label_over = sink.get_label(); + let label_sll_1 = sink.get_label(); + Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_bits(8)).emit( + &[], + sink, + emit_info, + state, + ); + Inst::AluRRR { + alu_op: AluOPRRR::Rem, + rd: writable_spilltmp_reg2(), + rs1: step.to_reg(), + rs2: spilltmp_reg2(), + } + .emit(&[], sink, emit_info, state); + Inst::CondBr { + taken: BranchTarget::Label(label_sll_1), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: spilltmp_reg2(), + rs2: zero_reg(), + }, + } + .emit(&[], sink, emit_info, state); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits(15), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(label_over), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_sll_1); + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp2, + rs: tmp2.to_reg(), + imm12: Imm12::from_bits(1), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_over); + } + Inst::Jal { + dest: BranchTarget::Label(label_loop), + } + .emit(&[], sink, emit_info, state); + } + sink.bind_label(label_done); + } + &Inst::StackProbeLoop { + guard_size, + probe_count, + tmp: guard_size_tmp, + } => { + let step = writable_spilltmp_reg(); + Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64)) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + Inst::load_constant_u64(guard_size_tmp, guard_size as u64) + .iter() + .for_each(|i| i.emit(&[], sink, emit_info, state)); + + let loop_start = sink.get_label(); + let label_done = sink.get_label(); + sink.bind_label(loop_start); + Inst::CondBr { + taken: BranchTarget::Label(label_done), + not_taken: BranchTarget::zero(), + kind: IntegerCompare { + kind: IntCC::UnsignedLessThanOrEqual, + rs1: step.to_reg(), + rs2: guard_size_tmp.to_reg(), + }, + } + .emit(&[], sink, emit_info, state); + // compute address. + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: writable_spilltmp_reg2(), + rs1: stack_reg(), + rs2: step.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::Store { + to: AMode::RegOffset(spilltmp_reg2(), 0, I8), + op: StoreOP::Sb, + flags: MemFlags::new(), + src: zero_reg(), + } + .emit(&[], sink, emit_info, state); + // reset step. + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: step, + rs1: step.to_reg(), + rs2: guard_size_tmp.to_reg(), + } + .emit(&[], sink, emit_info, state); + Inst::Jal { + dest: BranchTarget::Label(loop_start), + } + .emit(&[], sink, emit_info, state); + sink.bind_label(label_done); + } + }; + let end_off = sink.cur_offset(); + assert!( + (end_off - start_off) <= Inst::worst_case_size(), + "Inst:{:?} length:{} worst_case_size:{}", + self, + end_off - start_off, + Inst::worst_case_size() + ); + } + + fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String { + let mut allocs = AllocationConsumer::new(allocs); + self.print_with_state(state, &mut allocs) + } +} + +// helper function. +fn alloc_value_regs(orgin: &ValueRegs, alloc: &mut AllocationConsumer) -> ValueRegs { + match orgin.regs().len() { + 1 => ValueRegs::one(alloc.next(orgin.regs()[0])), + 2 => ValueRegs::two(alloc.next(orgin.regs()[0]), alloc.next(orgin.regs()[1])), + _ => unreachable!(), + } +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs new file mode 100644 index 000000000000..8992cf694af2 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs @@ -0,0 +1,2279 @@ +#[allow(unused)] +use crate::ir::LibCall; +use crate::isa::riscv64::inst::*; +use crate::settings; +use alloc::vec::Vec; + +#[test] +fn test_riscv64_binemit() { + struct TestUnit { + inst: Inst, + assembly: &'static str, + code: u32, + } + + impl TestUnit { + fn new(i: Inst, ass: &'static str, code: u32) -> Self { + Self { + inst: i, + assembly: ass, + code: code, + } + } + } + + let mut insns = Vec::::with_capacity(500); + + insns.push(TestUnit::new( + Inst::Mov { + rd: writable_fa0(), + rm: fa1(), + ty: F32, + }, + "fmv.s fa0,fa1", + 0x20b58553, + )); + + insns.push(TestUnit::new( + Inst::Mov { + rd: writable_fa0(), + rm: fa1(), + ty: F64, + }, + "fmv.d fa0,fa1", + 0x22b58553, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Brev8, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "brev8 a1,a0", + 0x68755593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Rev8, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "rev8 a1,a0", + 0x6b855593, + )); + + // + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bclri, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bclri a1,a0,5", + 0x48551593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bexti, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bexti a1,a0,5", + 0x48555593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Binvi, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "binvi a1,a0,5", + 0x68551593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Bseti, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "bseti a1,a0,5", + 0x28551593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Rori, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "rori a1,a0,5", + 0x60555593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Roriw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "roriw a1,a0,5", + 0x6055559b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SlliUw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slli.uw a1,a0,5", + 0x855159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Clz, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "clz a1,a0", + 0x60051593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Clzw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "clzw a1,a0", + 0x6005159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Cpop, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "cpop a1,a0", + 0x60251593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Cpopw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "cpopw a1,a0", + 0x6025159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Ctz, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "ctz a1,a0", + 0x60151593, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Ctzw, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "ctzw a1,a0", + 0x6015159b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sextb, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "sext.b a1,a0", + 0x60451593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sexth, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "sext.h a1,a0", + 0x60551593, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Zexth, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "zext.h a1,a0", + 0x80545bb, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Orcb, + rd: writable_a1(), + rs: a0(), + imm12: Imm12::zero(), + }, + "orc.b a1,a0", + 0x28755593, + )); + + // + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "add.uw a1,a0,zero", + 0x80505bb, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Andn, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "andn a1,a0,zero", + 0x400575b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bclr, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bclr a1,a0,zero", + 0x480515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bext, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bext a1,a0,zero", + 0x480555b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Binv, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "binv a1,a0,zero", + 0x680515b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Bset, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "bset a1,a0,zero", + 0x280515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmul, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmul a1,a0,zero", + 0xa0515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmulh, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmulh a1,a0,zero", + 0xa0535b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Clmulr, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "clmulr a1,a0,zero", + 0xa0525b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Max, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "max a1,a0,zero", + 0xa0565b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Maxu, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "maxu a1,a0,zero", + 0xa0575b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Min, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "min a1,a0,zero", + 0xa0545b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Minu, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "minu a1,a0,zero", + 0xa0555b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Orn, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "orn a1,a0,zero", + 0x400565b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rol, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rol a1,a0,zero", + 0x600515b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rolw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rolw a1,a0,zero", + 0x600515bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Ror, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "ror a1,a0,zero", + 0x600555b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rorw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "rorw a1,a0,zero", + 0x600555bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh1add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh1add a1,a0,zero", + 0x200525b3, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh1adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh1add.uw a1,a0,zero", + 0x200525bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh2add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh2add a1,a0,zero", + 0x200545b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh2adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh2add.uw a1,a0,zero", + 0x200545bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh3add, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh3add a1,a0,zero", + 0x200565b3, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sh3adduw, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "sh3add.uw a1,a0,zero", + 0x200565bb, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Xnor, + rd: writable_a1(), + rs1: a0(), + rs2: zero_reg(), + }, + "xnor a1,a0,zero", + 0x400545b3, + )); + + // + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_fp_reg(), + rs1: fp_reg(), + rs2: zero_reg(), + }, + "add fp,fp,zero", + 0x40433, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_fp_reg(), + rs: stack_reg(), + imm12: Imm12::maybe_from_u64(100).unwrap(), + }, + "addi fp,sp,100", + 0x6410413, + )); + insns.push(TestUnit::new( + Inst::Lui { + rd: writable_zero_reg(), + imm: Imm20::from_bits(120), + }, + "lui zero,120", + 0x78037, + )); + insns.push(TestUnit::new( + Inst::Auipc { + rd: writable_zero_reg(), + imm: Imm20::from_bits(120), + }, + "auipc zero,120", + 0x78017, + )); + + insns.push(TestUnit::new( + Inst::Jalr { + rd: writable_a0(), + base: a0(), + offset: Imm12::from_bits(100), + }, + "jalr a0,100(a0)", + 0x6450567, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lb, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I8), + }, + "lb a0,100(a1)", + 0x6458503, + )); + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lbu, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, B8), + }, + "lbu a0,100(a1)", + 0x645c503, + )); + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lh, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I16), + }, + "lh a0,100(a1)", + 0x6459503, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lhu, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, B16), + }, + "lhu a0,100(a1)", + 0x645d503, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lw, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I32), + }, + "lw a0,100(a1)", + 0x645a503, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Lwu, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, B32), + }, + "lwu a0,100(a1)", + 0x645e503, + )); + insns.push(TestUnit::new( + Inst::Load { + rd: writable_a0(), + op: LoadOP::Ld, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "ld a0,100(a1)", + 0x645b503, + )); + insns.push(TestUnit::new( + Inst::Load { + rd: Writable::from_reg(fa0()), + op: LoadOP::Flw, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "flw fa0,100(a1)", + 0x645a507, + )); + + insns.push(TestUnit::new( + Inst::Load { + rd: Writable::from_reg(fa0()), + op: LoadOP::Fld, + flags: MemFlags::new(), + from: AMode::RegOffset(a1(), 100, I64), + }, + "fld fa0,100(a1)", + 0x645b507, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I8), + op: StoreOP::Sb, + flags: MemFlags::new(), + src: a0(), + }, + "sb a0,100(sp)", + 0x6a10223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I16), + op: StoreOP::Sh, + flags: MemFlags::new(), + src: a0(), + }, + "sh a0,100(sp)", + 0x6a11223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I32), + op: StoreOP::Sw, + flags: MemFlags::new(), + src: a0(), + }, + "sw a0,100(sp)", + 0x6a12223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Sd, + flags: MemFlags::new(), + src: a0(), + }, + "sd a0,100(sp)", + 0x6a13223, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Fsw, + flags: MemFlags::new(), + src: fa0(), + }, + "fsw fa0,100(sp)", + 0x6a12227, + )); + insns.push(TestUnit::new( + Inst::Store { + to: AMode::SPOffset(100, I64), + op: StoreOP::Fsd, + flags: MemFlags::new(), + src: fa0(), + }, + "fsd fa0,100(sp)", + 0x6a13227, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "addi a0,a0,100", + 0x6450513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slti, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "slti a0,a0,100", + 0x6452513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SltiU, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "sltiu a0,a0,100", + 0x6453513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Xori, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "xori a0,a0,100", + 0x6454513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(100), + }, + "andi a0,a0,100", + 0x6457513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slli a0,a0,5", + 0x551513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srli, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srli a0,a0,5", + 0x555513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Srai, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srai a0,a0,5", + 0x40555513, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(120), + }, + "addiw a0,a0,120", + 0x785051b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Slliw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "slliw a0,a0,5", + 0x55151b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::SrliW, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "srliw a0,a0,5", + 0x55551b, + )); + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sraiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "sraiw a0,a0,5", + 0x4055551b, + )); + + insns.push(TestUnit::new( + Inst::AluRRImm12 { + alu_op: AluOPRRI::Sraiw, + rd: writable_a0(), + rs: a0(), + imm12: Imm12::from_bits(5), + }, + "sraiw a0,a0,5", + 0x4055551b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "add a0,a0,a1", + 0xb50533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sub a0,a0,a1", + 0x40b50533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sll, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sll a0,a0,a1", + 0xb51533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Slt, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "slt a0,a0,a1", + 0xb52533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::SltU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sltu a0,a0,a1", + 0xb53533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Xor, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "xor a0,a0,a1", + 0xb54533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Srl, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "srl a0,a0,a1", + 0xb55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sra, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sra a0,a0,a1", + 0x40b55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Or, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "or a0,a0,a1", + 0xb56533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::And, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "and a0,a0,a1", + 0xb57533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Addw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "addw a0,a0,a1", + 0xb5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Subw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "subw a0,a0,a1", + 0x40b5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sllw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sllw a0,a0,a1", + 0xb5153b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Srlw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "srlw a0,a0,a1", + 0xb5553b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Sraw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "sraw a0,a0,a1", + 0x40b5553b, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mul, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mul a0,a0,a1", + 0x2b50533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulh, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulh a0,a0,a1", + 0x2b51533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulhsu, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulhsu a0,a0,a1", + 0x2b52533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulhu, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulhu a0,a0,a1", + 0x2b53533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Div, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "div a0,a0,a1", + 0x2b54533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::DivU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "divu a0,a0,a1", + 0x2b55533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Rem, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "rem a0,a0,a1", + 0x2b56533, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::RemU, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remu a0,a0,a1", + 0x2b57533, + )); + + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Mulw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "mulw a0,a0,a1", + 0x2b5053b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Divw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "divw a0,a0,a1", + 0x2b5453b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Remw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remw a0,a0,a1", + 0x2b5653b, + )); + insns.push(TestUnit::new( + Inst::AluRRR { + alu_op: AluOPRRR::Remuw, + rd: writable_a0(), + rs1: a0(), + rs2: a1(), + }, + "remuw a0,a0,a1", + 0x2b5753b, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRRR::FaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fadd.s fa0,fa0,fa1,rne", + 0xb50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RTZ), + alu_op: FpuOPRRR::FsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsub.s fa0,fa0,fa1,rtz", + 0x8b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: Some(FRM::RUP), + alu_op: FpuOPRRR::FmulS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmul.s fa0,fa0,fa1,rup", + 0x10b53553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FdivS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fdiv.s fa0,fa0,fa1", + 0x18b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnj.s fa0,fa0,fa1", + 0x20b50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjnS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjn.s fa0,fa0,fa1", + 0x20b51553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjxS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjx.s fa0,fa0,fa1", + 0x20b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FminS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmin.s fa0,fa0,fa1", + 0x28b50553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmaxS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmax.s fa0,fa0,fa1", + 0x28b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FeqS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "feq.s a0,fa0,fa1", + 0xa0b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FltS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "flt.s a0,fa0,fa1", + 0xa0b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FleS, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "fle.s a0,fa0,fa1", + 0xa0b50553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fadd.d fa0,fa0,fa1", + 0x2b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsub.d fa0,fa0,fa1", + 0xab57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmulD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmul.d fa0,fa0,fa1", + 0x12b57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FdivD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fdiv.d fa0,fa0,fa1", + 0x1ab57553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnj.d fa0,fa0,fa1", + 0x22b50553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjnD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjn.d fa0,fa0,fa1", + 0x22b51553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FsgnjxD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fsgnjx.d fa0,fa0,fa1", + 0x22b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FminD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmin.d fa0,fa0,fa1", + 0x2ab50553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FmaxD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + }, + "fmax.d fa0,fa0,fa1", + 0x2ab51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FeqD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "feq.d a0,fa0,fa1", + 0xa2b52553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FltD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "flt.d a0,fa0,fa1", + 0xa2b51553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + frm: None, + alu_op: FpuOPRRR::FleD, + rd: writable_a0(), + rs1: fa0(), + rs2: fa1(), + }, + "fle.d a0,fa0,fa1", + 0xa2b50553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRR::FsqrtS, + rd: writable_fa0(), + rs: fa1(), + }, + "fsqrt.s fa0,fa1,rne", + 0x58058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWS, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.w.s a0,fa1", + 0xc005f553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWuS, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.wu.s a0,fa1", + 0xc015f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvXW, + rd: writable_a0(), + rs: fa1(), + }, + "fmv.x.w a0,fa1", + 0xe0058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FclassS, + rd: writable_a0(), + rs: fa1(), + }, + "fclass.s a0,fa1", + 0xe0059553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSw, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.w fa0,a0", + 0xd0057553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSwU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.wu fa0,a0", + 0xd0157553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvWX, + rd: writable_fa0(), + rs: a0(), + }, + "fmv.w.x fa0,a0", + 0xf0050553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLS, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.l.s a0,fa0", + 0xc0257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLuS, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.lu.s a0,fa0", + 0xc0357553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + + alu_op: FpuOPRR::FcvtSL, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.l fa0,a0", + 0xd0257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSLU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.s.lu fa0,a0", + 0xd0357553, + )); + + // + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FsqrtD, + rd: writable_fa0(), + rs: fa1(), + }, + "fsqrt.d fa0,fa1", + 0x5a05f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWD, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.w.d a0,fa1", + 0xc205f553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtWuD, + rd: writable_a0(), + rs: fa1(), + }, + "fcvt.wu.d a0,fa1", + 0xc215f553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvXD, + rd: writable_a0(), + rs: fa1(), + }, + "fmv.x.d a0,fa1", + 0xe2058553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FclassD, + rd: writable_a0(), + rs: fa1(), + }, + "fclass.d a0,fa1", + 0xe2059553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtSD, + rd: writable_fa0(), + rs: fa0(), + }, + "fcvt.s.d fa0,fa0", + 0x40157553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDWU, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.wu fa0,a0", + 0xd2150553, + )); + + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FmvDX, + rd: writable_fa0(), + rs: a0(), + }, + "fmv.d.x fa0,a0", + 0xf2050553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLD, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.l.d a0,fa0", + 0xc2257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtLuD, + rd: writable_a0(), + rs: fa0(), + }, + "fcvt.lu.d a0,fa0", + 0xc2357553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDL, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.l fa0,a0", + 0xd2257553, + )); + insns.push(TestUnit::new( + Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::FcvtDLu, + rd: writable_fa0(), + rs: a0(), + }, + "fcvt.d.lu fa0,a0", + 0xd2357553, + )); + ////////////////////// + + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: Some(FRM::RNE), + alu_op: FpuOPRRRR::FmaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmadd.s fa0,fa0,fa1,fa7,rne", + 0x88b50543, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FmsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmsub.s fa0,fa0,fa1,fa7", + 0x88b57547, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmsubS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmsub.s fa0,fa0,fa1,fa7", + 0x88b5754b, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmaddS, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmadd.s fa0,fa0,fa1,fa7", + 0x88b5754f, + )); + + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FmaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmadd.d fa0,fa0,fa1,fa7", + 0x8ab57543, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + + alu_op: FpuOPRRRR::FmsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fmsub.d fa0,fa0,fa1,fa7", + 0x8ab57547, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmsubD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmsub.d fa0,fa0,fa1,fa7", + 0x8ab5754b, + )); + insns.push(TestUnit::new( + Inst::FpuRRRR { + frm: None, + alu_op: FpuOPRRRR::FnmaddD, + rd: writable_fa0(), + rs1: fa0(), + rs2: fa1(), + rs3: fa7(), + }, + "fnmadd.d fa0,fa0,fa1,fa7", + 0x8ab5754f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::LrW, + rd: writable_a0(), + addr: a1(), + src: zero_reg(), + amo: AMO::Relax, + }, + "lr.w a0,(a1)", + 0x1005a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::ScW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Release, + }, + "sc.w.rl a0,a2,(a1)", + 0x1ac5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoswapW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Aquire, + }, + "amoswap.w.aq a0,a2,(a1)", + 0xcc5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoaddW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::SeqCst, + }, + "amoadd.w.aqrl a0,a2,(a1)", + 0x6c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoxorW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoxor.w a0,a2,(a1)", + 0x20c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoandW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoand.w a0,a2,(a1)", + 0x60c5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoorW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoor.w a0,a2,(a1)", + 0x40c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomin.w a0,a2,(a1)", + 0x80c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomax.w a0,a2,(a1)", + 0xa0c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominuW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amominu.w a0,a2,(a1)", + 0xc0c5a52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxuW, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomaxu.w a0,a2,(a1)", + 0xe0c5a52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::LrD, + rd: writable_a0(), + addr: a1(), + src: zero_reg(), + amo: AMO::Relax, + }, + "lr.d a0,(a1)", + 0x1005b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::ScD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "sc.d a0,a2,(a1)", + 0x18c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoswapD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoswap.d a0,a2,(a1)", + 0x8c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoaddD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoadd.d a0,a2,(a1)", + 0xc5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoxorD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoxor.d a0,a2,(a1)", + 0x20c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoandD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoand.d a0,a2,(a1)", + 0x60c5b52f, + )); + + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmoorD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amoor.d a0,a2,(a1)", + 0x40c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomin.d a0,a2,(a1)", + 0x80c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomax.d a0,a2,(a1)", + 0xa0c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmominuD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amominu.d a0,a2,(a1)", + 0xc0c5b52f, + )); + insns.push(TestUnit::new( + Inst::Atomic { + op: AtomicOP::AmomaxuD, + rd: writable_a0(), + addr: a1(), + src: a2(), + amo: AMO::Relax, + }, + "amomaxu.d a0,a2,(a1)", + 0xe0c5b52f, + )); + + ///////// + insns.push(TestUnit::new( + Inst::Fence { + pred: 1, + succ: 1 << 1, + }, + "fence w,r", + 0x120000f, + )); + insns.push(TestUnit::new(Inst::FenceI {}, "fence.i", 0x100f)); + insns.push(TestUnit::new(Inst::ECall {}, "ecall", 0x73)); + insns.push(TestUnit::new(Inst::EBreak {}, "ebreak", 0x100073)); + + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjS, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fmv.s fa0,fa1", + 0x20b58553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjD, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fmv.d fa0,fa1", + 0x22b58553, + )); + + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjnS, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fneg.s fa0,fa1", + 0x20b59553, + )); + insns.push(TestUnit::new( + Inst::FpuRRR { + alu_op: FpuOPRRR::FsgnjnD, + frm: None, + rd: writable_fa0(), + rs1: fa1(), + rs2: fa1(), + }, + "fneg.d fa0,fa1", + 0x22b59553, + )); + + let (flags, isa_flags) = make_test_flags(); + let emit_info = EmitInfo::new(flags, isa_flags); + + for unit in insns.iter() { + println!("Riscv64: {:?}, {}", unit.inst, unit.assembly); + // Check the printed text is as expected. + let actual_printing = unit + .inst + .print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[])); + assert_eq!(unit.assembly, actual_printing); + let mut buffer = MachBuffer::new(); + unit.inst + .emit(&[], &mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(); + if buffer.data() != unit.code.to_le_bytes() { + { + let gnu = DebugRTypeInst::from_bs(&unit.code.to_le_bytes()); + let my = DebugRTypeInst::from_bs(buffer.data()); + println!("gnu:{:?}", gnu); + println!("my :{:?}", my); + // println!("gnu:{:b}", gnu.funct7); + // println!("my :{:b}", my.funct7); + } + + { + let gnu = DebugITypeInst::from_bs(&unit.code.to_le_bytes()); + let my = DebugITypeInst::from_bs(buffer.data()); + println!("gnu:{:?}", gnu); + println!("my :{:?}", my); + println!("gnu:{:b}", gnu.op_code); + println!("my :{:b}", my.op_code); + } + assert_eq!(buffer.data(), unit.code.to_le_bytes()); + } + } +} + +fn make_test_flags() -> (settings::Flags, super::super::riscv_settings::Flags) { + let b = settings::builder(); + let flags = settings::Flags::new(b.clone()); + let b2 = super::super::riscv_settings::builder(); + let isa_flags = super::super::riscv_settings::Flags::new(&flags, b2); + (flags, isa_flags) +} + +#[derive(Debug)] +pub(crate) struct DebugRTypeInst { + op_code: u32, + rd: u32, + funct3: u32, + rs1: u32, + rs2: u32, + funct7: u32, +} + +impl DebugRTypeInst { + pub(crate) fn from_bs(x: &[u8]) -> Self { + let a = [x[0], x[1], x[2], x[3]]; + Self::from_u32(u32::from_le_bytes(a)) + } + + pub(crate) fn from_u32(x: u32) -> Self { + let op_code = x & 0b111_1111; + let x = x >> 7; + let rd = x & 0b1_1111; + let x = x >> 5; + let funct3 = x & 0b111; + let x = x >> 3; + let rs1 = x & 0b1_1111; + let x = x >> 5; + let rs2 = x & 0b1_1111; + let x = x >> 5; + let funct7 = x & 0b111_1111; + Self { + op_code, + rd, + funct3, + rs1, + rs2, + funct7, + } + } +} + +#[derive(Debug)] +pub(crate) struct DebugITypeInst { + op_code: u32, + rd: u32, + funct3: u32, + rs: u32, + imm12: u32, + shamt5: u32, + shamt6: u32, + funct7: u32, + funct6: u32, +} + +impl DebugITypeInst { + pub(crate) fn from_bs(x: &[u8]) -> Self { + let a = [x[0], x[1], x[2], x[3]]; + Self::from_u32(u32::from_le_bytes(a)) + } + pub(crate) fn from_u32(x: u32) -> Self { + let op_code = x & 0b111_1111; + let x = x >> 7; + let rd = x & 0b1_1111; + let x = x >> 5; + let funct3 = x & 0b111; + let x = x >> 3; + let rs = x & 0b1_1111; + let x = x >> 5; + let imm12 = x & 0b1111_1111_1111; + let shamt5 = imm12 & 0b1_1111; + let shamt6 = imm12 & 0b11_1111; + let funct7 = imm12 >> 5; + let funct6 = funct7 >> 1; + Self { + op_code, + rd, + funct3, + rs, + imm12, + shamt5, + shamt6, + funct7, + funct6, + } + } + fn print_b(self) { + println!("opcode:{:b}", self.op_code); + println!("rd:{}", self.rd); + println!("funct3:{:b}", self.funct3); + println!("rs:{}", self.rs); + println!("shamt5:{:b}", self.shamt5); + println!("shamt6:{:b}", self.shamt6); + println!("funct6:{:b}", self.funct6); + println!("funct7:{:b}", self.funct7); + } +} + +#[test] +fn xxx() { + let x = 1240847763; + let x = DebugITypeInst::from_u32(x); + x.print_b(); +} + +#[test] +fn riscv64_worst_case_instruction_size() { + let (flags, isa_flags) = make_test_flags(); + let emit_info = EmitInfo::new(flags, isa_flags); + + //there are all candidates potential generate a lot of bytes. + let mut candidates: Vec = vec![]; + + candidates.push(Inst::Fcmp { + rd: writable_a0(), + cc: FloatCC::UnorderedOrLessThanOrEqual, + ty: F64, + rs1: fa1(), + rs2: fa0(), + }); + + candidates.push(Inst::IntSelect { + dst: vec![writable_a0(), writable_a0()], + ty: I128, + op: IntSelectOP::Imax, + x: ValueRegs::two(x_reg(1), x_reg(2)), + y: ValueRegs::two(x_reg(3), x_reg(4)), + }); + + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I64, + is_sat: true, + tmp: writable_a1(), + }); + + candidates.push(Inst::FcvtToInt { + rd: writable_a0(), + rs: fa0(), + is_signed: true, + in_type: F64, + out_type: I64, + is_sat: false, + tmp: writable_a1(), + }); + + candidates.push(Inst::FloatRound { + op: FloatRoundOP::Trunc, + int_tmp: writable_a0(), + f_tmp: writable_a0(), + rd: writable_fa0(), + rs: fa0(), + ty: F64, + }); + + candidates.push(Inst::FloatSelect { + op: FloatSelectOP::Max, + rd: writable_fa0(), + tmp: writable_a0(), + rs1: fa0(), + rs2: fa0(), + ty: F64, + }); + + let mut max: (u32, MInst) = (0, Inst::Nop0); + for i in candidates { + let mut buffer = MachBuffer::new(); + i.emit(&[], &mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(); + let length = buffer.data().len() as u32; + if length > max.0 { + let length = buffer.data().len() as u32; + max = (length, i.clone()); + } + println!("insn:{:?} length: {}", i, length); + } + println!("calculate max size is {} , inst is {:?}", max.0, max.1); + assert!(max.0 <= Inst::worst_case_size()); +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/imms.rs b/cranelift/codegen/src/isa/riscv64/inst/imms.rs new file mode 100644 index 000000000000..d53315ade081 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst/imms.rs @@ -0,0 +1,218 @@ +//! Riscv64 ISA definitions: immediate constants. + +// Some variants are never constructed, but we still want them as options in the future. +use super::Inst; +#[allow(dead_code)] +use std::fmt::{Debug, Display, Formatter, Result}; + +#[derive(Copy, Clone, Debug, Default)] +pub struct Imm12 { + pub bits: i16, +} + +impl Imm12 { + pub(crate) const FALSE: Self = Self { bits: 0 }; + pub(crate) const TRUE: Self = Self { bits: -1 }; + pub fn maybe_from_u64(val: u64) -> Option { + let sign_bit = 1 << 11; + if val == 0 { + Some(Imm12 { bits: 0 }) + } else if (val & sign_bit) != 0 && (val >> 12) == 0xffff_ffff_ffff_f { + Some(Imm12 { + bits: (val & 0xffff) as i16, + }) + } else if (val & sign_bit) == 0 && (val >> 12) == 0 { + Some(Imm12 { + bits: (val & 0xffff) as i16, + }) + } else { + None + } + } + #[inline] + pub fn from_bits(bits: i16) -> Self { + Self { bits: bits & 0xfff } + } + /// Create a zero immediate of this format. + #[inline] + pub fn zero() -> Self { + Imm12 { bits: 0 } + } + #[inline] + pub fn as_i16(self) -> i16 { + self.bits + } + #[inline] + pub fn as_u32(&self) -> u32 { + (self.bits as u32) & 0xfff + } +} + +impl Into for Imm12 { + fn into(self) -> i64 { + self.bits as i64 + } +} + +impl Display for Imm12 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{:+}", self.bits) + } +} + +impl std::ops::Neg for Imm12 { + type Output = Self; + fn neg(self) -> Self::Output { + Self { bits: -self.bits } + } +} + +// singed +#[derive(Clone, Copy, Default)] +pub struct Imm20 { + /// The immediate bits. + pub bits: i32, +} + +impl Imm20 { + #[inline] + pub fn from_bits(bits: i32) -> Self { + Self { + bits: bits & 0xf_ffff, + } + } + #[inline] + pub fn as_u32(&self) -> u32 { + (self.bits as u32) & 0xf_ffff + } +} + +impl Debug for Imm20 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.bits) + } +} + +impl Display for Imm20 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.bits) + } +} + +#[derive(Clone, Copy)] +pub struct Uimm5 { + bits: u8, +} + +impl Uimm5 { + pub fn from_bits(bits: u8) -> Self { + Self { bits } + } + /// Create a zero immediate of this format. + pub fn zero() -> Self { + Self { bits: 0 } + } + pub fn as_u32(&self) -> u32 { + (self.bits as u32) & 0b1_1111 + } +} + +impl Debug for Uimm5 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.bits) + } +} + +impl Display for Uimm5 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + write!(f, "{}", self.bits) + } +} + +impl Inst { + pub(crate) fn imm_min() -> i64 { + let imm20_max: i64 = (1 << 19) << 12; + let imm12_max = 1 << 11; + -imm20_max - imm12_max + } + pub(crate) fn imm_max() -> i64 { + let imm20_max: i64 = ((1 << 19) - 1) << 12; + let imm12_max = (1 << 11) - 1; + imm20_max + imm12_max + } + + /// An imm20 immediate and an Imm12 immediate can generate a 32-bit immediate. + /// This helper produces an imm12, imm20, or both to generate the value. + /// + /// `value` must be between `imm_min()` and `imm_max()`, or else + /// this helper returns `None`. + pub(crate) fn generate_imm( + value: u64, + mut handle_imm: impl FnMut(Option, Option) -> R, + ) -> Option { + if let Some(imm12) = Imm12::maybe_from_u64(value) { + // can be load using single imm12. + let r = handle_imm(None, Some(imm12)); + return Some(r); + } + let value = value as i64; + if !(value >= Self::imm_min() && value <= Self::imm_max()) { + // not in range, return None. + return None; + } + const MOD_NUM: i64 = 4096; + let (imm20, imm12) = if value > 0 { + let mut imm20 = value / MOD_NUM; + let mut imm12 = value % MOD_NUM; + if imm12 >= 2048 { + imm12 -= MOD_NUM; + imm20 += 1; + } + assert!(imm12 >= -2048 && imm12 <= 2047); + (imm20, imm12) + } else { + // this is the abs value. + let value_abs = value.abs(); + let imm20 = value_abs / MOD_NUM; + let imm12 = value_abs % MOD_NUM; + let mut imm20 = -imm20; + let mut imm12 = -imm12; + if imm12 < -2048 { + imm12 += MOD_NUM; + imm20 -= 1; + } + (imm20, imm12) + }; + assert!(imm20 >= -(0x7_ffff + 1) && imm20 <= 0x7_ffff); + assert!(imm20 != 0 || imm12 != 0); + Some(handle_imm( + if imm20 != 0 { + Some(Imm20::from_bits(imm20 as i32)) + } else { + None + }, + if imm12 != 0 { + Some(Imm12::from_bits(imm12 as i16)) + } else { + None + }, + )) + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_imm12() { + let x = Imm12::zero(); + assert_eq!(0, x.as_u32()); + Imm12::maybe_from_u64(0xffff_ffff_ffff_ffff).unwrap(); + } + + #[test] + fn imm20_and_imm12() { + assert!(Inst::imm_max() == (i32::MAX - 2048) as i64); + assert!(Inst::imm_min() == i32::MIN as i64 - 2048); + } +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs new file mode 100644 index 000000000000..f212f45326fc --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -0,0 +1,1749 @@ +//! This module defines riscv64-specific machine instruction types. + +// Some variants are not constructed, but we still want them as options in the future. +#![allow(dead_code)] +#![allow(non_camel_case_types)] + +use crate::binemit::{Addend, CodeOffset, Reloc}; +pub use crate::ir::condcodes::IntCC; +use crate::ir::types::{ + B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, IFLAGS, R32, R64, +}; + +pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel}; +use crate::isa::CallConv; +use crate::machinst::isle::WritableReg; +use crate::machinst::*; +use crate::{settings, CodegenError, CodegenResult}; + +pub use crate::ir::condcodes::FloatCC; + +use alloc::vec::Vec; +use regalloc2::{PRegSet, VReg}; +use smallvec::SmallVec; +use std::boxed::Box; +use std::string::{String, ToString}; + +pub mod regs; +pub use self::regs::*; +pub mod imms; +pub use self::imms::*; +pub mod args; +pub use self::args::*; +pub mod emit; +pub use self::emit::*; +pub mod unwind; + +use crate::isa::riscv64::abi::Riscv64MachineDeps; + +#[cfg(test)] +mod emit_tests; + +use std::fmt::{Display, Formatter}; + +pub(crate) type OptionReg = Option; +pub(crate) type OptionImm12 = Option; +pub(crate) type VecBranchTarget = Vec; +pub(crate) type OptionUimm5 = Option; +pub(crate) type OptionFloatRoundingMode = Option; +pub(crate) type VecU8 = Vec; +pub(crate) type VecWritableReg = Vec>; +//============================================================================= +// Instructions (top level): definition + +use crate::isa::riscv64::lower::isle::generated_code::MInst; +pub use crate::isa::riscv64::lower::isle::generated_code::{ + AluOPRRI, AluOPRRR, AtomicOP, CsrOP, FClassResult, FFlagsException, FenceFm, FloatRoundOP, + FloatSelectOP, FpuOPRR, FpuOPRRR, FpuOPRRRR, IntSelectOP, LoadOP, MInst as Inst, + ReferenceCheckOP, StoreOP, FRM, +}; + +type BoxCallInfo = Box; +type BoxCallIndInfo = Box; + +/// Additional information for (direct) Call instructions, left out of line to lower the size of +/// the Inst enum. +#[derive(Clone, Debug)] +pub struct CallInfo { + pub dest: ExternalName, + pub uses: CallArgList, + pub defs: CallRetList, + pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, + pub clobbers: PRegSet, +} + +/// Additional information for CallInd instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct CallIndInfo { + pub rn: Reg, + pub uses: CallArgList, + pub defs: CallRetList, + pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, + pub clobbers: PRegSet, +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BranchTarget { + /// An unresolved reference to a Label, as passed into + /// `lower_branch_group()`. + Label(MachLabel), + /// A fixed PC offset. + ResolvedOffset(i32), +} + +impl BranchTarget { + /// Return the target's label, if it is a label-based target. + pub(crate) fn as_label(self) -> Option { + match self { + BranchTarget::Label(l) => Some(l), + _ => None, + } + } + /// offset zero. + #[inline] + pub(crate) fn zero() -> Self { + Self::ResolvedOffset(0) + } + #[inline] + pub(crate) fn offset(off: i32) -> Self { + Self::ResolvedOffset(off) + } + #[inline] + pub(crate) fn is_zero(self) -> bool { + match self { + BranchTarget::Label(_) => false, + BranchTarget::ResolvedOffset(off) => off == 0, + } + } + #[inline] + pub(crate) fn as_offset(self) -> Option { + match self { + BranchTarget::Label(_) => None, + BranchTarget::ResolvedOffset(off) => Some(off), + } + } +} + +impl Display for BranchTarget { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + BranchTarget::Label(l) => write!(f, "{}", l.to_string()), + BranchTarget::ResolvedOffset(off) => write!(f, "{}", off), + } + } +} + +pub(crate) fn enc_auipc(rd: Writable, imm: Imm20) -> u32 { + let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.as_u32() << 12; + x +} + +pub(crate) fn enc_jalr(rd: Writable, base: Reg, offset: Imm12) -> u32 { + let x = 0b1100111 + | reg_to_gpr_num(rd.to_reg()) << 7 + | 0b000 << 12 + | reg_to_gpr_num(base) << 15 + | offset.as_u32() << 20; + x +} + +/// rd and src must have the same length. +pub(crate) fn gen_moves(rd: &[Writable], src: &[Reg]) -> SmallInstVec { + assert!(rd.len() == src.len()); + assert!(rd.len() > 0); + let mut insts = SmallInstVec::new(); + for (dst, src) in rd.iter().zip(src.iter()) { + let out_ty = Inst::canonical_type_for_rc(dst.to_reg().class()); + let in_ty = Inst::canonical_type_for_rc(src.class()); + insts.push(gen_move(*dst, out_ty, *src, in_ty)); + } + insts +} + +/// if input or output is float, +/// you should use special instruction. +/// generate a move and re-interpret the data. +pub(crate) fn gen_move(rd: Writable, oty: Type, rm: Reg, ity: Type) -> Inst { + match (ity.is_float(), oty.is_float()) { + (false, false) => Inst::gen_move(rd, rm, oty), + (true, true) => Inst::gen_move(rd, rm, oty), + (false, true) => Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::move_x_to_f_op(oty), + rd: rd, + rs: rm, + }, + (true, false) => Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::move_f_to_x_op(ity), + rd: rd, + rs: rm, + }, + } +} + +impl Inst { + const INSTRUCTION_SIZE: i32 = 4; + + #[inline] + pub(crate) fn load_imm12(rd: Writable, imm: Imm12) -> Inst { + Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd: rd, + rs: zero_reg(), + imm12: imm, + } + } + + /// Immediates can be loaded using lui and addi instructions. + fn load_const_imm(rd: Writable, value: u64) -> Option> { + Inst::generate_imm(value, |imm20, imm12| { + let mut insts = SmallVec::new(); + imm20.map(|x| insts.push(Inst::Lui { rd, imm: x })); + imm12.map(|x| { + let imm20_is_none = imm20.is_none(); + let rs = if imm20_is_none { + zero_reg() + } else { + rd.to_reg() + }; + insts.push(Inst::AluRRImm12 { + alu_op: AluOPRRI::Addi, + rd, + rs, + imm12: x, + }) + }); + + insts + }) + } + + pub(crate) fn load_constant_u32(rd: Writable, value: u64) -> SmallInstVec { + let insts = Inst::load_const_imm(rd, value); + insts.unwrap_or(LoadConstant::U32(value as u32).load_constant(rd)) + } + + pub fn load_constant_u64(rd: Writable, value: u64) -> SmallInstVec { + let insts = Inst::load_const_imm(rd, value); + insts.unwrap_or(LoadConstant::U64(value).load_constant(rd)) + } + + pub(crate) fn construct_auipc_and_jalr( + link: Option>, + tmp: Writable, + offset: i64, + ) -> [Inst; 2] { + Inst::generate_imm(offset as u64, |imm20, imm12| { + let a = Inst::Auipc { + rd: tmp, + imm: imm20.unwrap_or_default(), + }; + let b = Inst::Jalr { + rd: link.unwrap_or(writable_zero_reg()), + base: tmp.to_reg(), + offset: imm12.unwrap_or_default(), + }; + [a, b] + }) + .expect("code range is too big.") + } + + /// Create instructions that load a 32-bit floating-point constant. + pub fn load_fp_constant32( + rd: Writable, + const_data: u32, + tmp: Writable, + ) -> SmallVec<[Inst; 4]> { + let mut insts = SmallVec::new(); + insts.extend(Self::load_constant_u32(tmp, const_data as u64)); + insts.push(Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::move_x_to_f_op(F32), + rd, + rs: tmp.to_reg(), + }); + insts + } + + /// Create instructions that load a 64-bit floating-point constant. + pub fn load_fp_constant64( + rd: Writable, + const_data: u64, + tmp: WritableReg, + ) -> SmallVec<[Inst; 4]> { + let mut insts = SmallInstVec::new(); + insts.extend(Self::load_constant_u64(tmp, const_data)); + insts.push(Inst::FpuRR { + frm: None, + alu_op: FpuOPRR::move_x_to_f_op(F64), + rd, + rs: tmp.to_reg(), + }); + insts + } + + /// Generic constructor for a load (zero-extending where appropriate). + pub fn gen_load(into_reg: Writable, mem: AMode, ty: Type, flags: MemFlags) -> Inst { + Inst::Load { + rd: into_reg, + op: LoadOP::from_type(ty), + from: mem, + flags, + } + } + + /// Generic constructor for a store. + pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { + Inst::Store { + src: from_reg, + op: StoreOP::from_type(ty), + to: mem, + flags, + } + } +} + +//============================================================================= +fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) { + match inst { + &Inst::Nop0 => {} + &Inst::Nop4 => {} + &Inst::BrTable { index, tmp1, .. } => { + collector.reg_use(index); + collector.reg_early_def(tmp1); + } + &Inst::BrTableCheck { index, .. } => { + collector.reg_use(index); + } + &Inst::Auipc { rd, .. } => collector.reg_def(rd), + &Inst::Lui { rd, .. } => collector.reg_def(rd), + &Inst::AluRRR { rd, rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_def(rd); + } + &Inst::FpuRRR { rd, rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_def(rd); + } + &Inst::AluRRImm12 { rd, rs, .. } => { + collector.reg_use(rs); + collector.reg_def(rd); + } + &Inst::Load { rd, from, .. } => { + collector.reg_use(from.get_base_register()); + collector.reg_def(rd); + } + &Inst::Store { to, src, .. } => { + collector.reg_use(to.get_base_register()); + collector.reg_use(src); + } + + &Inst::Args { ref args } => { + for arg in args { + collector.reg_fixed_def(arg.vreg, arg.preg); + } + } + &Inst::Ret { ref rets } => { + collector.reg_uses(&rets[..]); + } + + &Inst::Extend { rd, rn, .. } => { + collector.reg_use(rn); + collector.reg_def(rd); + } + &Inst::AjustSp { .. } => {} + &Inst::Call { ref info } => { + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } + collector.reg_clobbers(info.clobbers); + } + &Inst::CallInd { ref info } => { + collector.reg_use(info.rn); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } + collector.reg_clobbers(info.clobbers); + } + &Inst::TrapIf { test, .. } => { + collector.reg_use(test); + } + &Inst::TrapFf { x, y, tmp, .. } => { + collector.reg_use(x); + collector.reg_use(y); + collector.reg_early_def(tmp); + } + + &Inst::Jal { .. } => {} + &Inst::CondBr { kind, .. } => { + collector.reg_use(kind.rs1); + collector.reg_use(kind.rs2); + } + &Inst::LoadExtName { rd, .. } => { + collector.reg_def(rd); + } + &Inst::LoadAddr { rd, mem } => { + collector.reg_use(mem.get_base_register()); + collector.reg_early_def(rd); + } + + &Inst::VirtualSPOffsetAdj { .. } => {} + &Inst::Mov { rd, rm, .. } => { + collector.reg_use(rm); + collector.reg_def(rd); + } + &Inst::Fence { .. } => {} + &Inst::FenceI => {} + &Inst::ECall => {} + &Inst::EBreak => {} + &Inst::Udf { .. } => {} + &Inst::FpuRR { rd, rs, .. } => { + collector.reg_use(rs); + collector.reg_def(rd); + } + &Inst::FpuRRRR { + rd, rs1, rs2, rs3, .. + } => { + collector.reg_uses(&[rs1, rs2, rs3]); + collector.reg_def(rd); + } + + &Inst::Jalr { rd, base, .. } => { + collector.reg_use(base); + collector.reg_def(rd); + } + &Inst::Atomic { rd, addr, src, .. } => { + collector.reg_use(addr); + collector.reg_use(src); + collector.reg_def(rd); + } + &Inst::Fcmp { rd, rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_early_def(rd); + } + &Inst::Select { + ref dst, + condition, + x, + y, + .. + } => { + collector.reg_use(condition); + collector.reg_uses(x.regs()); + collector.reg_uses(y.regs()); + collector.reg_defs(&dst[..]); + } + &Inst::ReferenceCheck { rd, x, .. } => { + collector.reg_use(x); + collector.reg_def(rd); + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + .. + } => { + collector.reg_uses(&[offset, e, addr, v]); + collector.reg_early_def(t0); + collector.reg_early_def(dst); + } + &Inst::IntSelect { + ref dst, + ref x, + ref y, + .. + } => { + collector.reg_uses(x.regs()); + collector.reg_uses(y.regs()); + collector.reg_defs(&dst[..]); + } + + &Inst::Csr { rd, rs, .. } => { + if let Some(rs) = rs { + collector.reg_use(rs); + } + collector.reg_def(rd); + } + + &Inst::Icmp { rd, a, b, .. } => { + collector.reg_uses(a.regs()); + collector.reg_uses(b.regs()); + collector.reg_def(rd); + } + + &Inst::SelectReg { + rd, + rs1, + rs2, + condition, + } => { + collector.reg_use(condition.rs1); + collector.reg_use(condition.rs2); + collector.reg_use(rs1); + collector.reg_use(rs2); + collector.reg_def(rd); + } + &Inst::FcvtToInt { rd, rs, tmp, .. } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_def(rd); + } + &Inst::SelectIf { + ref rd, + test, + ref x, + ref y, + .. + } => { + collector.reg_use(test); + collector.reg_uses(x.regs()); + collector.reg_uses(y.regs()); + rd.iter().for_each(|r| collector.reg_def(*r)); + } + &Inst::RawData { .. } => {} + &Inst::AtomicStore { src, p, .. } => { + collector.reg_use(src); + collector.reg_use(p); + } + &Inst::AtomicLoad { rd, p, .. } => { + collector.reg_use(p); + collector.reg_def(rd); + } + &Inst::AtomicRmwLoop { + offset, + dst, + p, + x, + t0, + .. + } => { + collector.reg_uses(&[offset, p, x]); + collector.reg_early_def(t0); + collector.reg_early_def(dst); + } + &Inst::TrapIfC { rs1, rs2, .. } => { + collector.reg_use(rs1); + collector.reg_use(rs2); + } + &Inst::Unwind { .. } => {} + &Inst::DummyUse { reg } => { + collector.reg_use(reg); + } + &Inst::FloatRound { + rd, + int_tmp, + f_tmp, + rs, + .. + } => { + collector.reg_use(rs); + collector.reg_early_def(int_tmp); + collector.reg_early_def(f_tmp); + collector.reg_early_def(rd); + } + &Inst::FloatSelect { + rd, tmp, rs1, rs2, .. + } => { + collector.reg_uses(&[rs1, rs2]); + collector.reg_early_def(tmp); + collector.reg_early_def(rd); + } + &Inst::FloatSelectPseudo { + rd, tmp, rs1, rs2, .. + } => { + collector.reg_uses(&[rs1, rs2]); + collector.reg_early_def(tmp); + collector.reg_early_def(rd); + } + &Inst::Popcnt { + sum, step, rs, tmp, .. + } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(sum); + } + &Inst::Rev8 { rs, rd, tmp, step } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(rd); + } + &Inst::Cltz { + sum, step, tmp, rs, .. + } => { + collector.reg_use(rs); + collector.reg_early_def(tmp); + collector.reg_early_def(step); + collector.reg_early_def(sum); + } + &Inst::Brev8 { + rs, + rd, + step, + tmp, + tmp2, + .. + } => { + collector.reg_use(rs); + collector.reg_early_def(step); + collector.reg_early_def(tmp); + collector.reg_early_def(tmp2); + collector.reg_early_def(rd); + } + &Inst::StackProbeLoop { .. } => { + // StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue. + // t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg) + // gen_prologue is called at emit stage. + // no need let reg alloc know. + } + } +} + +impl MachInst for Inst { + type LabelUse = LabelUse; + type ABIMachineSpec = Riscv64MachineDeps; + + fn gen_dummy_use(reg: Reg) -> Self { + Inst::DummyUse { reg } + } + + fn canonical_type_for_rc(rc: RegClass) -> Type { + match rc { + regalloc2::RegClass::Int => I64, + regalloc2::RegClass::Float => F64, + } + } + + fn is_safepoint(&self) -> bool { + match self { + &Inst::Call { .. } + | &Inst::CallInd { .. } + | &Inst::TrapIf { .. } + | &Inst::Udf { .. } => true, + _ => false, + } + } + + fn get_operands VReg>(&self, collector: &mut OperandCollector<'_, F>) { + riscv64_get_operands(self, collector); + } + + fn is_move(&self) -> Option<(Writable, Reg)> { + match self { + Inst::Mov { rd, rm, .. } => Some((rd.clone(), rm.clone())), + _ => None, + } + } + + fn is_included_in_clobbers(&self) -> bool { + true + } + + fn is_args(&self) -> bool { + match self { + Self::Args { .. } => true, + _ => false, + } + } + + fn is_term(&self) -> MachTerminator { + match self { + &Inst::Jal { .. } => MachTerminator::Uncond, + &Inst::CondBr { .. } => MachTerminator::Cond, + &Inst::Jalr { .. } => MachTerminator::Uncond, + &Inst::Ret { .. } => MachTerminator::Ret, + // BrTableCheck is a check before BrTable + // can lead transfer to default_. + &Inst::BrTable { .. } | &Inst::BrTableCheck { .. } => MachTerminator::Indirect, + _ => MachTerminator::None, + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + let x = Inst::Mov { + rd: to_reg, + rm: from_reg, + ty, + }; + x + } + + fn gen_constant Writable>( + to_regs: ValueRegs>, + mut value: u128, + ty: Type, + mut alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + if ty.is_bool() && value != 0 { + value = !0; + } + if (ty.bits() <= 64 && (ty.is_bool() || ty.is_int())) || ty == R32 || ty == R64 { + return Inst::load_constant_u64(to_regs.only_reg().unwrap(), value as u64); + }; + match ty { + F32 => { + Inst::load_fp_constant32(to_regs.only_reg().unwrap(), value as u32, alloc_tmp(I64)) + } + F64 => { + Inst::load_fp_constant64(to_regs.only_reg().unwrap(), value as u64, alloc_tmp(I64)) + } + I128 | B128 => { + let mut insts = SmallInstVec::new(); + insts.extend(Inst::load_constant_u64( + to_regs.regs()[0], + (value >> 64) as u64, + )); + insts.extend(Inst::load_constant_u64(to_regs.regs()[1], value as u64)); + return insts; + } + _ => unreachable!("vector type not implemented now."), + } + } + + fn gen_nop(preferred_size: usize) -> Inst { + if preferred_size == 0 { + return Inst::Nop0; + } + // We can't give a NOP (or any insn) < 4 bytes. + assert!(preferred_size >= 4); + Inst::Nop4 + } + + fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { + match ty { + I8 => Ok((&[RegClass::Int], &[I8])), + I16 => Ok((&[RegClass::Int], &[I16])), + I32 => Ok((&[RegClass::Int], &[I32])), + I64 => Ok((&[RegClass::Int], &[I64])), + B1 => Ok((&[RegClass::Int], &[B1])), + B8 => Ok((&[RegClass::Int], &[B8])), + B16 => Ok((&[RegClass::Int], &[B16])), + B32 => Ok((&[RegClass::Int], &[B32])), + B64 => Ok((&[RegClass::Int], &[B64])), + R32 => panic!("32-bit reftype pointer should never be seen on riscv64"), + R64 => Ok((&[RegClass::Int], &[R64])), + F32 => Ok((&[RegClass::Float], &[F32])), + F64 => Ok((&[RegClass::Float], &[F64])), + I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])), + B128 => Ok((&[RegClass::Int, RegClass::Int], &[B64, B64])), + IFLAGS => Ok((&[RegClass::Int], &[IFLAGS])), + FFLAGS => Ok((&[RegClass::Int], &[FFLAGS])), + _ => Err(CodegenError::Unsupported(format!( + "Unexpected SSA-value type: {}", + ty + ))), + } + } + + fn gen_jump(target: MachLabel) -> Inst { + Inst::Jal { + dest: BranchTarget::Label(target), + } + } + + fn worst_case_size() -> CodeOffset { + // calculate by test function riscv64_worst_case_instruction_size() + 100 + } + + fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { + RegClass::Int + } +} + +//============================================================================= +// Pretty-printing of instructions. +pub fn reg_name(reg: Reg) -> String { + match reg.to_real_reg() { + Some(real) => match real.class() { + RegClass::Int => match real.hw_enc() { + 0 => "zero".into(), + 1 => "ra".into(), + 2 => "sp".into(), + 3 => "gp".into(), + 4 => "tp".into(), + 5 => "t0".into(), + 6..=7 => format!("t{}", real.hw_enc() - 5), + 8 => "fp".into(), + 9 => "s1".into(), + 10..=17 => format!("a{}", real.hw_enc() - 10), + 18..=27 => format!("s{}", real.hw_enc() - 16), + 28..=31 => format!("t{}", real.hw_enc() - 25), + _ => unreachable!(), + }, + RegClass::Float => match real.hw_enc() { + 0..=7 => format!("ft{}", real.hw_enc() - 0), + 8..=9 => format!("fs{}", real.hw_enc() - 8), + 10..=17 => format!("fa{}", real.hw_enc() - 10), + 18..=27 => format!("fs{}", real.hw_enc() - 16), + 28..=31 => format!("ft{}", real.hw_enc() - 20), + _ => unreachable!(), + }, + }, + None => { + format!("{:?}", reg) + } + } +} + +impl Inst { + fn print_with_state( + &self, + _state: &mut EmitState, + allocs: &mut AllocationConsumer<'_>, + ) -> String { + let format_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String { + let reg = allocs.next(reg); + reg_name(reg) + }; + + let format_regs = |regs: &[Reg], allocs: &mut AllocationConsumer<'_>| -> String { + let mut x = if regs.len() > 1 { + String::from("[") + } else { + String::default() + }; + regs.iter().for_each(|i| { + x.push_str(format_reg(i.clone(), allocs).as_str()); + if *i != *regs.last().unwrap() { + x.push_str(","); + } + }); + if regs.len() > 1 { + x.push_str("]"); + } + x + }; + let format_labels = |labels: &[MachLabel]| -> String { + if labels.len() == 0 { + return String::from("[_]"); + } + let mut x = String::from("["); + labels.iter().for_each(|l| { + x.push_str( + format!( + "{:?}{}", + l, + if l != labels.last().unwrap() { "," } else { "" }, + ) + .as_str(), + ); + }); + x.push_str("]"); + x + }; + + fn format_extend_op(signed: bool, from_bits: u8, _to_bits: u8) -> String { + let type_name = match from_bits { + 1 => "b1", + 8 => "b", + 16 => "h", + 32 => "w", + _ => unreachable!("from_bits:{:?}", from_bits), + }; + format!("{}ext.{}", if signed { "s" } else { "u" }, type_name) + } + fn format_frm(rounding_mode: Option) -> String { + if let Some(r) = rounding_mode { + format!(",{}", r.to_static_str(),) + } else { + "".into() + } + } + match self { + &Inst::Nop0 => { + format!("##zero length nop") + } + &Inst::Nop4 => { + format!("##fixed 4-size nop") + } + &Inst::StackProbeLoop { + guard_size, + probe_count, + tmp, + } => { + let tmp = format_reg(tmp.to_reg(), allocs); + format!( + "inline_stack_probe##guard_size={} probe_count={} tmp={}", + guard_size, probe_count, tmp + ) + } + &Inst::FloatRound { + op, + rd, + int_tmp, + f_tmp, + rs, + ty, + } => { + let rs = format_reg(rs, allocs); + let int_tmp = format_reg(int_tmp.to_reg(), allocs); + let f_tmp = format_reg(f_tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "{} {},{}##int_tmp={} f_tmp={} ty={}", + op.op_name(), + rd, + rs, + int_tmp, + f_tmp, + ty + ) + } + &Inst::FloatSelectPseudo { + op, + rd, + tmp, + rs1, + rs2, + ty, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "f{}.{}.pseudo {},{},{}##tmp={} ty={}", + op.op_name(), + if ty == F32 { "s" } else { "d" }, + rd, + rs1, + rs2, + tmp, + ty + ) + } + &Inst::FloatSelect { + op, + rd, + tmp, + rs1, + rs2, + ty, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "f{}.{} {},{},{}##tmp={} ty={}", + op.op_name(), + if ty == F32 { "s" } else { "d" }, + rd, + rs1, + rs2, + tmp, + ty + ) + } + &Inst::AtomicStore { src, ty, p } => { + let src = format_reg(src, allocs); + let p = format_reg(p, allocs); + format!("atomic_store.{} {},({})", ty, src, p) + } + &Inst::DummyUse { reg } => { + let reg = format_reg(reg, allocs); + format!("dummy_use {}", reg) + } + + &Inst::AtomicLoad { rd, ty, p } => { + let p = format_reg(p, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("atomic_load.{} {},({})", ty, rd, p) + } + &Inst::AtomicRmwLoop { + offset, + op, + dst, + ty, + p, + x, + t0, + } => { + let offset = format_reg(offset, allocs); + let p = format_reg(p, allocs); + let x = format_reg(x, allocs); + let t0 = format_reg(t0.to_reg(), allocs); + let dst = format_reg(dst.to_reg(), allocs); + format!( + "atomic_rmw.{} {} {},{},({})##t0={} offset={}", + ty, op, dst, x, p, t0, offset + ) + } + + &Inst::RawData { ref data } => match data.len() { + 4 => { + let mut bytes = [0; 4]; + for i in 0..bytes.len() { + bytes[i] = data[i]; + } + format!(".4byte 0x{:x}", u32::from_le_bytes(bytes)) + } + 8 => { + let mut bytes = [0; 8]; + for i in 0..bytes.len() { + bytes[i] = data[i]; + } + format!(".8byte 0x{:x}", u64::from_le_bytes(bytes)) + } + _ => { + format!(".data {:?}", data) + } + }, + &Inst::Unwind { ref inst } => { + format!("unwind {:?}", inst) + } + &Inst::Brev8 { + rs, + ty, + step, + tmp, + tmp2, + rd, + } => { + let rs = format_reg(rs, allocs); + let step = format_reg(step.to_reg(), allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let tmp2 = format_reg(tmp2.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "brev8 {},{}##tmp={} tmp2={} step={} ty={}", + rd, rs, tmp, tmp2, step, ty + ) + } + &Inst::SelectIf { + if_spectre_guard, + ref rd, + test, + ref x, + ref y, + } => { + let test = format_reg(test, allocs); + let x = format_regs(x.regs(), allocs); + let y = format_regs(y.regs(), allocs); + let rd: Vec<_> = rd.iter().map(|r| r.to_reg()).collect(); + let rd = format_regs(&rd[..], allocs); + format!( + "selectif{} {},{},{}##test={}", + if if_spectre_guard { + "_spectre_guard" + } else { + "" + }, + rd, + x, + y, + test + ) + } + &Inst::Popcnt { + sum, + step, + rs, + tmp, + ty, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let sum = format_reg(sum.to_reg(), allocs); + format!("popcnt {},{}##ty={} tmp={} step={}", sum, rs, ty, tmp, step) + } + &Inst::Rev8 { rs, rd, tmp, step } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("rev8 {},{}##step={} tmp={}", rd, rs, step, tmp) + } + &Inst::Cltz { + sum, + step, + rs, + tmp, + ty, + leading, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let step = format_reg(step.to_reg(), allocs); + let sum = format_reg(sum.to_reg(), allocs); + format!( + "{} {},{}##ty={} tmp={} step={}", + if leading { "clz" } else { "ctz" }, + sum, + rs, + ty, + tmp, + step + ) + } + &Inst::FcvtToInt { + is_sat, + rd, + rs, + is_signed, + in_type, + out_type, + tmp, + } => { + let rs = format_reg(rs, allocs); + let tmp = format_reg(tmp.to_reg(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "fcvt_to_{}int{}.{} {},{}##in_ty={} tmp={}", + if is_signed { "s" } else { "u" }, + if is_sat { "_sat" } else { "" }, + out_type, + rd, + rs, + in_type, + tmp + ) + } + &Inst::SelectReg { + rd, + rs1, + rs2, + ref condition, + } => { + let c_rs1 = format_reg(condition.rs1, allocs); + let c_rs2 = format_reg(condition.rs2, allocs); + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "select_reg {},{},{}##condition={}", + rd, + rs1, + rs2, + format!("({} {} {})", c_rs1, condition.kind.to_static_str(), c_rs2), + ) + } + &Inst::AtomicCas { + offset, + t0, + dst, + e, + addr, + v, + ty, + } => { + let offset = format_reg(offset, allocs); + let e = format_reg(e, allocs); + let addr = format_reg(addr, allocs); + let v = format_reg(v, allocs); + let t0 = format_reg(t0.to_reg(), allocs); + let dst = format_reg(dst.to_reg(), allocs); + format!( + "atomic_cas.{} {},{},{},({})##t0={} offset={}", + ty, dst, e, v, addr, t0, offset, + ) + } + &Inst::Icmp { cc, rd, a, b, ty } => { + let a = format_regs(a.regs(), allocs); + let b = format_regs(b.regs(), allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{},{}##ty={}", cc.to_static_str(), rd, a, b, ty) + } + &Inst::IntSelect { + op, + ref dst, + x, + y, + ty, + } => { + let x = format_regs(x.regs(), allocs); + let y = format_regs(y.regs(), allocs); + let dst: Vec<_> = dst.iter().map(|r| r.to_reg()).collect(); + let dst = format_regs(&dst[..], allocs); + format!("{} {},{},{}##ty={}", op.op_name(), dst, x, y, ty,) + } + &Inst::BrTableCheck { + index, + targets_len, + default_, + } => { + let index = format_reg(index, allocs); + format!( + "br_table_check {}##targets_len={} default_={}", + index, targets_len, default_ + ) + } + &Inst::BrTable { + index, + tmp1, + ref targets, + } => { + let targets: Vec<_> = targets.iter().map(|x| x.as_label().unwrap()).collect(); + format!( + "{} {},{}##tmp1={}", + "br_table", + format_reg(index, allocs), + format_labels(&targets[..]), + format_reg(tmp1.to_reg(), allocs), + ) + } + &Inst::Auipc { rd, imm } => { + format!( + "{} {},{}", + "auipc", + format_reg(rd.to_reg(), allocs), + imm.bits + ) + } + + &Inst::ReferenceCheck { rd, op, x } => { + let x = format_reg(x, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}", op.op_name(), rd, x) + } + &Inst::Jalr { rd, base, offset } => { + let base = format_reg(base, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}({})", "jalr", rd, offset.bits, base) + } + &Inst::Lui { rd, ref imm } => { + format!("{} {},{}", "lui", format_reg(rd.to_reg(), allocs), imm.bits) + } + + &Inst::AluRRR { + alu_op, + rd, + rs1, + rs2, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{},{}", alu_op.op_name(), rd, rs1, rs2,) + } + &Inst::FpuRR { + frm, + alu_op, + rd, + rs, + } => { + let rs = format_reg(rs, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}{}", alu_op.op_name(), rd, rs, format_frm(frm)) + } + &Inst::FpuRRR { + alu_op, + rd, + rs1, + rs2, + frm, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rd = format_reg(rd.to_reg(), allocs); + let rs1_is_rs2 = rs1 == rs2; + if rs1_is_rs2 && alu_op.is_copy_sign() { + // this is move instruction. + format!( + "fmv.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else if rs1_is_rs2 && alu_op.is_copy_neg_sign() { + format!( + "fneg.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else if rs1_is_rs2 && alu_op.is_copy_xor_sign() { + format!( + "fabs.{} {},{}", + if alu_op.is_32() { "s" } else { "d" }, + rd, + rs1 + ) + } else { + format!( + "{} {},{},{}{}", + alu_op.op_name(), + rd, + rs1, + rs2, + format_frm(frm) + ) + } + } + &Inst::Csr { + csr_op, + rd, + rs, + imm, + csr, + } => { + let rs = rs.map_or("".into(), |r| format_reg(r, allocs)); + let rd = format_reg(rd.to_reg(), allocs); + if csr_op.need_rs() { + format!("{} {},{},{}", csr_op.op_name(), rd, csr, rs) + } else { + format!("{} {},{},{}", csr_op.op_name(), rd, csr, imm.unwrap()) + } + } + &Inst::FpuRRRR { + alu_op, + rd, + rs1, + rs2, + rs3, + frm, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rs3 = format_reg(rs3, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "{} {},{},{},{}{}", + alu_op.op_name(), + rd, + rs1, + rs2, + rs3, + format_frm(frm) + ) + } + &Inst::AluRRImm12 { + alu_op, + rd, + rs, + ref imm12, + } => { + let rs_s = format_reg(rs, allocs); + let rd = format_reg(rd.to_reg(), allocs); + // check if it is a load constant. + if alu_op == AluOPRRI::Addi && rs == zero_reg() { + format!("li {},{}", rd, imm12.as_i16()) + } else if alu_op == AluOPRRI::Xori && imm12.as_i16() == -1 { + format!("not {},{}", rd, rs_s) + } else { + if alu_op.option_funct12().is_some() { + format!("{} {},{}", alu_op.op_name(), rd, rs_s) + } else { + format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16()) + } + } + } + &Inst::Load { + rd, + op, + from, + flags: _flags, + } => { + let base = from.to_string_with_alloc(allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("{} {},{}", op.op_name(), rd, base,) + } + &Inst::Fcmp { + rd, + cc, + ty, + rs1, + rs2, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!( + "f{}.{} {},{},{}", + cc, + if ty == F32 { "s" } else { "d" }, + rd, + rs1, + rs2, + ) + } + &Inst::Store { + to, + src, + op, + flags: _flags, + } => { + let base = to.to_string_with_alloc(allocs); + let src = format_reg(src, allocs); + format!("{} {},{}", op.op_name(), src, base,) + } + &Inst::Args { ref args } => { + let mut s = "args".to_string(); + let mut empty_allocs = AllocationConsumer::default(); + for arg in args { + use std::fmt::Write; + let preg = format_reg(arg.preg, &mut empty_allocs); + let def = format_reg(arg.vreg.to_reg(), allocs); + write!(&mut s, " {}={}", def, preg).unwrap(); + } + s + } + &Inst::Ret { .. } => { + format!("ret") + } + + &MInst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } => { + let rn = format_reg(rn, allocs); + let rm = format_reg(rd.to_reg(), allocs); + format!( + "{} {},{}", + format_extend_op(signed, from_bits, to_bits), + rm, + rn + ) + } + &MInst::AjustSp { amount } => { + format!("{} sp,{:+}", "add", amount) + } + &MInst::Call { ref info } => format!("call {}", info.dest.display(None)), + &MInst::CallInd { ref info } => { + let rd = format_reg(info.rn, allocs); + format!("callind {}", rd) + } + &MInst::TrapIf { test, trap_code } => { + format!("trap_if {},{}", format_reg(test, allocs), trap_code,) + } + &MInst::TrapIfC { + rs1, + rs2, + cc, + trap_code, + } => { + let rs1 = format_reg(rs1, allocs); + let rs2 = format_reg(rs2, allocs); + format!("trap_ifc {}##({} {} {})", trap_code, rs1, cc, rs2) + } + &MInst::TrapFf { + cc, + x, + y, + ty, + trap_code, + tmp, + } => format!( + "trap_ff_{} {} {},{}##tmp={} ty={}", + cc, + trap_code, + format_reg(x, allocs), + format_reg(y, allocs), + format_reg(tmp.to_reg(), allocs), + ty, + ), + &MInst::Jal { dest, .. } => { + format!("{} {}", "j", dest) + } + &MInst::CondBr { + taken, + not_taken, + kind, + .. + } => { + let rs1 = format_reg(kind.rs1, allocs); + let rs2 = format_reg(kind.rs2, allocs); + if not_taken.is_zero() && taken.as_label().is_none() { + let off = taken.as_offset().unwrap(); + format!("{} {},{},{}", kind.op_name(), rs1, rs2, off) + } else { + let x = format!( + "{} {},{},taken({}),not_taken({})", + kind.op_name(), + rs1, + rs2, + taken, + not_taken + ); + x + } + } + &MInst::Atomic { + op, + rd, + addr, + src, + amo, + } => { + let op_name = op.op_name(amo); + let addr = format_reg(addr, allocs); + let src = format_reg(src, allocs); + let rd = format_reg(rd.to_reg(), allocs); + if op.is_load() { + format!("{} {},({})", op_name, rd, addr) + } else { + format!("{} {},{},({})", op_name, rd, src, addr) + } + } + &MInst::LoadExtName { + rd, + ref name, + offset, + } => { + let rd = format_reg(rd.to_reg(), allocs); + format!("load_sym {},{}{:+}", rd, name.display(None), offset) + } + &MInst::LoadAddr { ref rd, ref mem } => { + let rs = mem.to_addr(allocs); + let rd = format_reg(rd.to_reg(), allocs); + format!("load_addr {},{}", rd, rs) + } + &MInst::VirtualSPOffsetAdj { amount } => { + format!("virtual_sp_offset_adj {:+}", amount) + } + &MInst::Mov { rd, rm, ty } => { + let rd = format_reg(rd.to_reg(), allocs); + let rm = format_reg(rm, allocs); + let v = if ty == F32 { + "fmv.s" + } else if ty == F64 { + "fmv.d" + } else { + "mv" + }; + format!("{} {},{}", v, rd, rm) + } + &MInst::Fence { pred, succ } => { + format!( + "fence {},{}", + Inst::fence_req_to_string(pred), + Inst::fence_req_to_string(succ), + ) + } + &MInst::FenceI => "fence.i".into(), + &MInst::Select { + ref dst, + condition, + ref x, + ref y, + ty, + } => { + let condition = format_reg(condition, allocs); + let x = format_regs(x.regs(), allocs); + let y = format_regs(y.regs(), allocs); + let dst: Vec<_> = dst.clone().into_iter().map(|r| r.to_reg()).collect(); + let dst = format_regs(&dst[..], allocs); + format!("select_{} {},{},{}##condition={}", ty, dst, x, y, condition) + } + &MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code), + &MInst::EBreak {} => String::from("ebreak"), + &MInst::ECall {} => String::from("ecall"), + } + } +} + +/// Different forms of label references for different instruction formats. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LabelUse { + /// 20-bit branch offset (unconditional branches). PC-rel, offset is + /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions. + Jal20, + + /// The unconditional jump instructions all use PC-relative + /// addressing to help support position independent code. The JALR + /// instruction was defined to enable a two-instruction sequence to + /// jump anywhere in a 32-bit absolute address range. A LUI + /// instruction can first load rs1 with the upper 20 bits of a + /// target address, then JALR can add in the lower bits. Similarly, + /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative + /// address range. + PCRel32, + + /// All branch instructions use the B-type instruction format. The + /// 12-bit B-immediate encodes signed offsets in multiples of 2, and + /// is added to the current pc to give the target address. The + /// conditional branch range is ±4 KiB. + B12, +} + +impl MachInstLabelUse for LabelUse { + /// Alignment for veneer code. Every Riscv64 instruction must be + /// 4-byte-aligned. + const ALIGN: CodeOffset = 4; + + /// Maximum PC-relative range (positive), inclusive. + fn max_pos_range(self) -> CodeOffset { + match self { + LabelUse::Jal20 => ((1 << 19) - 1) * 2, + LabelUse::PCRel32 => Inst::imm_max() as CodeOffset, + LabelUse::B12 => ((1 << 11) - 1) * 2, + } + } + + /// Maximum PC-relative range (negative). + fn max_neg_range(self) -> CodeOffset { + match self { + LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset, + _ => self.max_pos_range() + 2, + } + } + + /// Size of window into code needed to do the patch. + fn patch_size(self) -> CodeOffset { + match self { + LabelUse::Jal20 => 4, + LabelUse::PCRel32 => 8, + LabelUse::B12 => 4, + } + } + + /// Perform the patch. + fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { + assert!(use_offset % 4 == 0); + assert!(label_offset % 4 == 0); + let offset = (label_offset as i64) - (use_offset as i64); + + // re-check range + assert!( + offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64), + "{:?} offset '{}' use_offset:'{}' label_offset:'{}' must not exceed max range.", + self, + offset, + use_offset, + label_offset, + ); + self.patch_raw_offset(buffer, offset); + } + + /// Is a veneer supported for this label reference type? + fn supports_veneer(self) -> bool { + match self { + Self::B12 => true, + Self::Jal20 => true, + _ => false, + } + } + + /// How large is the veneer, if supported? + fn veneer_size(self) -> CodeOffset { + match self { + Self::B12 => 8, + Self::Jal20 => 8, + _ => unreachable!(), + } + } + + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return + /// an offset and label-use for the veneer's use of the original label. + fn generate_veneer( + self, + buffer: &mut [u8], + veneer_offset: CodeOffset, + ) -> (CodeOffset, LabelUse) { + let base = writable_spilltmp_reg(); + { + let x = enc_auipc(base, Imm20::from_bits(0)).to_le_bytes(); + buffer[0] = x[0]; + buffer[1] = x[1]; + buffer[2] = x[2]; + buffer[3] = x[3]; + } + { + let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::from_bits(0)).to_le_bytes(); + buffer[4] = x[0]; + buffer[5] = x[1]; + buffer[6] = x[2]; + buffer[7] = x[3]; + } + (veneer_offset, Self::PCRel32) + } + + fn from_reloc(reloc: Reloc, addend: Addend) -> Option { + match (reloc, addend) { + (Reloc::RiscvCall, _) => Some(Self::PCRel32), + _ => None, + } + } +} + +impl LabelUse { + fn offset_in_range(self, offset: i64) -> bool { + let min = -(self.max_neg_range() as i64); + let max = self.max_pos_range() as i64; + offset >= min && offset <= max + } + + fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) { + let insn = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + match self { + LabelUse::Jal20 => { + let offset = offset as u32; + let v = ((offset >> 12 & 0b1111_1111) << 12) + | ((offset >> 11 & 0b1) << 20) + | ((offset >> 1 & 0b11_1111_1111) << 21) + | ((offset >> 20 & 0b1) << 31); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); + } + LabelUse::PCRel32 => { + let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]); + Inst::generate_imm(offset as u64, |imm20, imm12| { + let imm20 = imm20.unwrap_or_default(); + let imm12 = imm12.unwrap_or_default(); + // Encode the OR-ed-in value with zero_reg(). The + // register parameter must be in the original + // encoded instruction and or'ing in zeroes does not + // change it. + buffer[0..4].clone_from_slice(&u32::to_le_bytes( + insn | enc_auipc(writable_zero_reg(), imm20), + )); + buffer[4..8].clone_from_slice(&u32::to_le_bytes( + insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12), + )); + }) + // expect make sure we handled. + .expect("we have check the range before,this is a compiler error."); + } + + LabelUse::B12 => { + let offset = offset as u32; + let v = ((offset >> 11 & 0b1) << 7) + | ((offset >> 1 & 0b1111) << 8) + | ((offset >> 5 & 0b11_1111) << 25) + | ((offset >> 12 & 0b1) << 31); + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v)); + } + } + } +} + +pub(crate) fn overflow_already_lowerd() -> ! { + unreachable!("overflow and nof should be lowered at early phase.") +} +#[cfg(test)] +mod test { + use super::*; + #[test] + fn label_use_max_range() { + assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2); + assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2); + assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset)); + assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset)); + assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2); + } +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/regs.rs b/cranelift/codegen/src/isa/riscv64/inst/regs.rs new file mode 100644 index 000000000000..35cef328c2c1 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst/regs.rs @@ -0,0 +1,220 @@ +//! Riscv64 ISA definitions: registers. +//! + +use crate::settings; + +use crate::machinst::{Reg, Writable}; + +use crate::machinst::RealReg; +use alloc::vec; +use alloc::vec::Vec; + +use regalloc2::VReg; +use regalloc2::{MachineEnv, PReg, RegClass}; + +// first argument of function call +#[inline] +pub fn a0() -> Reg { + x_reg(10) +} + +// second argument of function call +#[inline] +pub fn a1() -> Reg { + x_reg(11) +} + +// third argument of function call +#[inline] +pub fn a2() -> Reg { + x_reg(12) +} + +#[inline] +pub fn writable_a0() -> Writable { + Writable::from_reg(a0()) +} +#[inline] +pub fn writable_a1() -> Writable { + Writable::from_reg(a1()) +} +#[inline] +pub fn writable_a2() -> Writable { + Writable::from_reg(a2()) +} + +#[inline] +pub fn fa0() -> Reg { + f_reg(10) +} +#[inline] +pub fn writable_fa0() -> Writable { + Writable::from_reg(fa0()) +} +#[inline] +pub fn writable_fa1() -> Writable { + Writable::from_reg(fa1()) +} +#[inline] +pub fn fa1() -> Reg { + f_reg(11) +} + +#[inline] +pub fn fa7() -> Reg { + f_reg(17) +} + +/// Get a reference to the zero-register. +#[inline] +pub fn zero_reg() -> Reg { + x_reg(0) +} + +/// Get a writable reference to the zero-register (this discards a result). +#[inline] +pub fn writable_zero_reg() -> Writable { + Writable::from_reg(zero_reg()) +} +#[inline] +pub fn stack_reg() -> Reg { + x_reg(2) +} + +/// Get a writable reference to the stack-pointer register. +#[inline] +pub fn writable_stack_reg() -> Writable { + Writable::from_reg(stack_reg()) +} + +/// Get a reference to the link register (x1). +pub fn link_reg() -> Reg { + x_reg(1) +} + +/// Get a writable reference to the link register. +#[inline] +pub fn writable_link_reg() -> Writable { + Writable::from_reg(link_reg()) +} + +/// Get a reference to the frame pointer (x29). +#[inline] +pub fn fp_reg() -> Reg { + x_reg(8) +} + +/// Get a writable reference to the frame pointer. +#[inline] +pub fn writable_fp_reg() -> Writable { + Writable::from_reg(fp_reg()) +} + +/// Get a reference to the first temporary, sometimes "spill temporary", +/// register. This register is used in various ways as a temporary. +#[inline] +pub fn spilltmp_reg() -> Reg { + x_reg(31) +} + +/// Get a writable reference to the spilltmp reg. +#[inline] +pub fn writable_spilltmp_reg() -> Writable { + Writable::from_reg(spilltmp_reg()) +} + +///spilltmp2 +#[inline] +pub fn spilltmp_reg2() -> Reg { + x_reg(30) +} + +/// Get a writable reference to the spilltmp2 reg. +#[inline] +pub fn writable_spilltmp_reg2() -> Writable { + Writable::from_reg(spilltmp_reg2()) +} + +pub fn crate_reg_eviroment(_flags: &settings::Flags) -> MachineEnv { + let preferred_regs_by_class: [Vec; 2] = { + let mut x_register: Vec = vec![]; + x_register.push(PReg::new(5, RegClass::Int)); + for i in 6..=7 { + x_register.push(PReg::new(i, RegClass::Int)); + } + for i in 10..=17 { + x_register.push(PReg::new(i, RegClass::Int)); + } + for i in 28..=29 { + x_register.push(PReg::new(i, RegClass::Int)); + } + + let mut f_register: Vec = vec![]; + for i in 0..=7 { + f_register.push(PReg::new(i, RegClass::Float)); + } + for i in 10..=17 { + f_register.push(PReg::new(i, RegClass::Float)); + } + for i in 28..=31 { + f_register.push(PReg::new(i, RegClass::Float)); + } + [x_register, f_register] + }; + + let non_preferred_regs_by_class: [Vec; 2] = { + let mut x_register: Vec = vec![]; + x_register.push(PReg::new(9, RegClass::Int)); + for i in 18..=27 { + x_register.push(PReg::new(i, RegClass::Int)); + } + let mut f_register: Vec = vec![]; + for i in 8..=9 { + f_register.push(PReg::new(i, RegClass::Float)); + } + for i in 18..=27 { + f_register.push(PReg::new(i, RegClass::Float)); + } + [x_register, f_register] + }; + + MachineEnv { + preferred_regs_by_class, + non_preferred_regs_by_class, + fixed_stack_slots: vec![], + } +} + +#[inline] +pub fn x_reg(enc: usize) -> Reg { + let p_reg = PReg::new(enc, RegClass::Int); + let v_reg = VReg::new(p_reg.index(), p_reg.class()); + Reg::from(v_reg) +} +pub fn px_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Int) +} + +#[inline] +pub fn f_reg(enc: usize) -> Reg { + let p_reg = PReg::new(enc, RegClass::Float); + let v_reg = VReg::new(p_reg.index(), p_reg.class()); + Reg::from(v_reg) +} +pub const fn pf_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Float) +} +#[inline] +pub(crate) fn real_reg_to_reg(x: RealReg) -> Reg { + let v_reg = VReg::new(x.hw_enc() as usize, x.class()); + Reg::from(v_reg) +} + +#[allow(dead_code)] +pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec> { + let mut regs = vec![]; + for i in start..=end { + regs.push(Writable::from_reg(x_reg(i))); + } + regs +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/unwind.rs b/cranelift/codegen/src/isa/riscv64/inst/unwind.rs new file mode 100644 index 000000000000..1e2bb904db74 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst/unwind.rs @@ -0,0 +1,2 @@ +#[cfg(feature = "unwind")] +pub(crate) mod systemv; diff --git a/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs new file mode 100644 index 000000000000..98ce93876db4 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/inst/unwind/systemv.rs @@ -0,0 +1,173 @@ +//! Unwind information for System V ABI (Riscv64). + +use crate::isa::riscv64::inst::regs; +use crate::isa::unwind::systemv::RegisterMappingError; +use crate::machinst::Reg; +use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; +use regalloc2::RegClass; + +/// Creates a new riscv64 common information entry (CIE). +pub fn create_cie() -> CommonInformationEntry { + use gimli::write::CallFrameInstruction; + + let mut entry = CommonInformationEntry::new( + Encoding { + address_size: 8, + format: Format::Dwarf32, + version: 1, + }, + 4, // Code alignment factor + -8, // Data alignment factor + Register(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16), + ); + + // Every frame will start with the call frame address (CFA) at SP + let sp = Register(regs::stack_reg().to_real_reg().unwrap().hw_enc().into()); + entry.add_instruction(CallFrameInstruction::Cfa(sp, 0)); + + entry +} + +/// Map Cranelift registers to their corresponding Gimli registers. +pub fn map_reg(reg: Reg) -> Result { + match reg.class() { + RegClass::Int => { + let reg = reg.to_real_reg().unwrap().hw_enc() as u16; + Ok(Register(reg)) + } + RegClass::Float => { + let reg = reg.to_real_reg().unwrap().hw_enc() as u16; + Ok(Register(32 + reg)) + } + } +} + +pub(crate) struct RegisterMapper; + +impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { + fn map(&self, reg: Reg) -> Result { + Ok(map_reg(reg)?.0) + } + fn sp(&self) -> u16 { + regs::stack_reg().to_real_reg().unwrap().hw_enc() as u16 + } + fn fp(&self) -> Option { + Some(regs::fp_reg().to_real_reg().unwrap().hw_enc() as u16) + } + fn lr(&self) -> Option { + Some(regs::link_reg().to_real_reg().unwrap().hw_enc() as u16) + } + fn lr_offset(&self) -> Option { + Some(8) + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + + use crate::ir::{ + types, AbiParam, Function, InstBuilder, Signature, StackSlotData, StackSlotKind, + UserFuncName, + }; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use gimli::write::Address; + use std::str::FromStr; + use target_lexicon::triple; + + #[test] + fn test_simple_func() { + let isa = lookup(triple!("riscv64")) + .expect("expect riscv64 ISA") + .finish(Flags::new(builder())) + .expect("Creating compiler backend"); + + let mut context = Context::for_function(create_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let fde = match context + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(1234)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 40, lsda: None, instructions: [(12, CfaOffset(16)), (12, Offset(Register(8), -16)), (12, Offset(Register(1), -8)), (16, CfaRegister(Register(8)))] }"); + } + + fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { + let mut func = + Function::with_name_signature(UserFuncName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.sized_stack_slots.push(stack_slot); + } + + func + } + + #[test] + fn test_multi_return_func() { + let isa = lookup(triple!("riscv64")) + .expect("expect riscv64 ISA") + .finish(Flags::new(builder())) + .expect("Creating compiler backend"); + + let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); + + context.compile(&*isa).expect("expected compilation"); + + let fde = match context + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(4321)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!( + format!("{:?}", fde), + "FrameDescriptionEntry { address: Constant(4321), length: 12, lsda: None, instructions: [] }" + ); + } + + fn create_multi_return_function(call_conv: CallConv) -> Function { + let mut sig = Signature::new(call_conv); + sig.params.push(AbiParam::new(types::I32)); + let mut func = Function::with_name_signature(UserFuncName::user(0, 0), sig); + + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().brnz(v0, block2, &[]); + pos.ins().jump(block1, &[]); + + pos.insert_block(block1); + pos.ins().return_(&[]); + + pos.insert_block(block2); + pos.ins().return_(&[]); + + func + } +} diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle new file mode 100644 index 000000000000..4a344c6ae0fa --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -0,0 +1,983 @@ +;; riscv64 instruction selection and CLIF-to-MachInst lowering. + +;; The main lowering constructor term: takes a clif `Inst` and returns the +;; register(s) within which the lowered instruction's result values live. +(decl lower (Inst) InstOutput) + +;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (iconst (u64_from_imm64 n)))) + (imm ty n)) + +;;;; Rules for `bconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (bconst $false))) + (imm ty 0)) + +(rule (lower (has_type ty (bconst $true))) + (imm ty 1)) + + +;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (null))) + (imm ty 0)) + + +;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_32 ty) (iadd x y))) + (alu_rrr (AluOPRRR.Addw) x y)) + +;; Base case, simply adding things in registers. +(rule (lower (has_type (fits_in_64 ty) (iadd x y))) + (alu_add x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) + (alu_rr_imm12 (select_addi ty) x y)) + +(rule (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) + (alu_rr_imm12 (select_addi ty) y x)) + +(rule + (lower (has_type $I128 (iadd x y))) + (let + ( ;; low part. + (low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0))) + ;; compute carry. + (carry Reg(alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0))) + ;; + (high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1))) + ;; add carry. + (high Reg (alu_add high_tmp carry))) + (value_regs low high))) + +;;; Rules for `iadd_ifcout` ;;;;;;;;;;;;; +(rule + (lower (has_type (fits_in_64 ty) (iadd_ifcout x y))) + (output_ifcout (alu_add x y))) + + +;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Base case, simply subtracting things in registers. + +(rule (lower (has_type (fits_in_64 ty) (isub x y))) + (alu_rrr (AluOPRRR.Sub) x y)) + +(rule (lower (has_type (fits_in_32 ty) (isub x y))) + (alu_rrr (AluOPRRR.Subw) x y)) + +(rule (lower (has_type $I128 (isub x y))) + (i128_sub x y)) + +;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; `i64` and smaller. +(rule (lower (has_type (fits_in_64 ty) (ineg x))) + (alu_rrr (AluOPRRR.Sub) (zero_reg) x)) + + +;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_64 ty) (imul x y))) + (alu_rrr (AluOPRRR.Mul) x y)) +(rule (lower (has_type (fits_in_32 ty) (imul x y))) + (alu_rrr (AluOPRRR.Mulw) x y)) + +;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (smulhi x y))) + (lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + +;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (umulhi x y))) + (lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) + +;; for I128 +(rule (lower (has_type $I128 (imul x y))) + (let + ((x_regs ValueRegs x) + (x_lo Reg (value_regs_get x_regs 0)) + (x_hi Reg (value_regs_get x_regs 1)) + + ;; Get the high/low registers for `y`. + (y_regs ValueRegs y) + (y_lo Reg (value_regs_get y_regs 0)) + (y_hi Reg (value_regs_get y_regs 1)) + + ;; 128bit mul formula: + ;; dst_lo = x_lo * y_lo + ;; dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) + ;; + ;; We can convert the above formula into the following + ;; umulh dst_hi, x_lo, y_lo + ;; madd dst_hi, x_lo, y_hi, dst_hi + ;; madd dst_hi, x_hi, y_lo, dst_hi + ;; madd dst_lo, x_lo, y_lo, zero + (dst_hi1 Reg (umulh x_lo y_lo)) + (dst_hi2 Reg (madd x_lo y_hi dst_hi1)) + (dst_hi Reg (madd x_hi y_lo dst_hi2)) + (dst_lo Reg (madd x_lo y_lo (zero_reg)))) + (value_regs dst_lo dst_hi))) + + +;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_32 ty) (udiv x y))) + (let + ((y2 Reg (ext_int_if_need $false y ty)) + (_ InstOutput (gen_div_by_zero y2))) + (alu_rrr (AluOPRRR.Divuw) (ext_int_if_need $false x ty) y2))) + +(rule (lower (has_type (fits_in_32 ty) (sdiv x y))) + (let + ((a Reg (ext_int_if_need $true x ty)) + (b Reg (ext_int_if_need $true y ty)) + (_ InstOutput (gen_div_overflow a b ty)) + (_ InstOutput (gen_div_by_zero b))) + (alu_rrr (AluOPRRR.Divw) a b))) + +(rule (lower (has_type $I64 (sdiv x y))) + (let + ((_ InstOutput (gen_div_overflow x y $I64)) + (_ InstOutput (gen_div_by_zero y)) ) + (alu_rrr (AluOPRRR.Div) x y))) + +(rule (lower (has_type $I64 (udiv x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (alu_rrr (AluOPRRR.DivU) x y))) + +;;;; Rules for `rem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type (fits_in_16 ty) (urem x y))) + (let + ((y2 Reg(ext_int_if_need $false y ty)) + (_ InstOutput (gen_div_by_zero y2))) + (alu_rrr (AluOPRRR.Remuw) (ext_int_if_need $false x ty) y2))) + +(rule (lower (has_type (fits_in_16 ty) (srem x y))) + (let + ((y2 Reg (ext_int_if_need $true y ty)) + (_ InstOutput (gen_div_by_zero y2))) + (alu_rrr (AluOPRRR.Remw) (ext_int_if_need $true x ty) y2))) + +(rule (lower (has_type $I32 (srem x y))) + (let + ((y2 Reg (ext_int_if_need $true y $I32)) + (_ InstOutput (gen_div_by_zero y2))) + (alu_rrr (AluOPRRR.Remw) x y2))) + +(rule (lower (has_type $I32 (urem x y))) + (let + ((y2 Reg (ext_int_if_need $false y $I32)) + (_ InstOutput (gen_div_by_zero y2))) + (alu_rrr (AluOPRRR.Remuw) x y2))) + +(rule (lower (has_type $I64 (srem x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (alu_rrr (AluOPRRR.Rem) x y))) + +(rule (lower (has_type $I64 (urem x y))) + (let + ((_ InstOutput (gen_div_by_zero y))) + (alu_rrr (AluOPRRR.RemU) x y))) + +;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (band x y))) + (alu_rrr (AluOPRRR.And) x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule (lower (has_type (fits_in_64 ty) (band x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Andi) x y)) + +(rule (lower (has_type (fits_in_64 ty) (band (imm12_from_value x) y))) + (alu_rr_imm12 (AluOPRRI.Andi) y x)) + +(rule (lower (has_type $B128 (band x y))) + (lower_b128_binary (AluOPRRR.And) x y)) +(rule (lower (has_type $I128 (band x y))) + (lower_b128_binary (AluOPRRR.And) x y)) + +(rule (lower (has_type $F32 (band x y))) + (lower_float_binary (AluOPRRR.And) x y $F32)) +(rule (lower (has_type $F64 (band x y))) + (lower_float_binary (AluOPRRR.And) x y $F64)) + + +;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (bor x y))) + (alu_rrr (AluOPRRR.Or) x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule (lower (has_type (fits_in_64 ty) (bor x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Ori) x y)) + +(rule (lower (has_type (fits_in_64 ty) (bor (imm12_from_value x) y))) + (alu_rr_imm12 (AluOPRRI.Ori) y x)) +(rule (lower (has_type $B128 (bor x y))) + (lower_b128_binary (AluOPRRR.Or) x y)) +(rule (lower (has_type $I128 (bor x y))) + (lower_b128_binary (AluOPRRR.Or) x y)) +(rule (lower (has_type $F32 (bor x y))) + (lower_float_binary (AluOPRRR.Or) x y $F32)) +(rule (lower (has_type $F64 (bor x y))) + (lower_float_binary (AluOPRRR.Or) x y $F64)) + + +;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (bxor x y))) + (alu_rrr (AluOPRRR.Xor) x y)) + +;; Special cases for when one operand is an immediate that fits in 12 bits. +(rule (lower (has_type (fits_in_64 ty) (bxor x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Xori) x y)) + +(rule (lower (has_type (fits_in_64 ty) (bxor (imm12_from_value x) y))) + (alu_rr_imm12 (AluOPRRI.Xori) y x)) +(rule (lower (has_type $B128 (bxor x y))) + (lower_b128_binary (AluOPRRR.Xor) x y)) +(rule (lower (has_type $I128 (bxor x y))) + (lower_b128_binary (AluOPRRR.Xor) x y)) +(rule (lower (has_type $F32 (bxor x y))) + (lower_float_binary (AluOPRRR.Xor) x y $F32)) +(rule (lower (has_type $F64 (bxor x y))) + (lower_float_binary (AluOPRRR.Xor) x y $F64)) + + +;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type fits_in_64 (bnot x))) + (alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_neg_bits -1))) + +(rule (lower (has_type $I128 (bnot x))) + (bnot_128 x)) +(rule (lower (has_type $B128 (bnot x))) + (bnot_128 x)) +(rule + (lower (has_type $F32 (bnot x))) + (lower_float_bnot x $F32) +) +(rule + (lower (has_type $F64 (bnot x))) + (lower_float_bnot x $F64) +) + +;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty (bitrev x))) + (lower_bit_reverse x ty)) + +(rule (lower (has_type $I128 (bitrev x))) + (let ((val ValueRegs x) + (lo_rev Reg (lower_bit_reverse (value_regs_get val 0) $I64)) + (hi_rev Reg (lower_bit_reverse (value_regs_get val 1) $I64))) + (value_regs hi_rev lo_rev))) + + +;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty (ctz x))) + (lower_ctz ty x)) + +(rule (lower (has_type $I128 (ctz x))) + (lower_ctz_128 x)) + +;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty (clz x))) + (lower_clz ty x)) +(rule (lower (has_type $I128 (clz x))) + (lower_clz_i128 x)) + +;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type out (uextend x @ (value_type in)))) + (lower_extend x $false (ty_bits in) (ty_bits out))) + +;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type out (sextend x @ (value_type in)))) + (lower_extend x $true (ty_bits in) (ty_bits out))) + + +;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (band_not x y))) + (gen_andn x y)) +(rule (lower (has_type $I128 (band_not x y))) + (let + ((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0))) + (high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + +;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty) (popcnt x))) + (lower_popcnt x ty)) +(rule (lower (has_type $I128 (popcnt x))) + (lower_popcnt_i128 x)) + +;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type $I8 (ishl x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Sllw) x (alu_andi y 7)) +) +(rule (lower (has_type $I8(ishl x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Slliw) x (imm12_and y 7))) + +(rule (lower (has_type $I16 (ishl x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Sllw) x (alu_andi y 15)) +) +(rule (lower (has_type $I16(ishl x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Slliw) x (imm12_and y 15))) + +(rule (lower (has_type $I32(ishl x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Sllw) x y)) +(rule (lower (has_type $I32 (ishl x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Slliw) x y)) + +(rule (lower (has_type $I64 (ishl x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Slli) x y)) +(rule (lower (has_type $I64(ishl x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Sll) x y)) + +(rule (lower (has_type $I128 (ishl x y))) + (lower_i128_ishl x y)) + +;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type $I8 (ushr x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Srlw) (ext_int_if_need $false x $I8) (alu_andi y 7)) +) +(rule (lower (has_type $I8(ushr x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.SrliW) (ext_int_if_need $false x $I8) (imm12_and y 7))) + +(rule (lower (has_type $I16 (ushr x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Srlw) (ext_int_if_need $false x $I16) (alu_andi y 15)) +) +(rule (lower (has_type $I16(ushr x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.SrliW) (ext_int_if_need $false x $I16) (imm12_and y 15))) + +(rule (lower (has_type $I32(ushr x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Srlw) x y)) +(rule (lower (has_type $I32 (ushr x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.SrliW) x y)) + +(rule (lower (has_type $I64 (ushr x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Srli) x y)) +(rule (lower (has_type $I64(ushr x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Srl) x y)) + +(rule (lower (has_type $I128 (ushr x y))) + (lower_i128_ushr x y)) + + +;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type $I8 (sshr x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Sra) (ext_int_if_need $true x $I8) (alu_andi y 7)) +) +(rule (lower (has_type $I8(sshr x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Srai) (ext_int_if_need $true x $I8) (imm12_and y 7))) + +(rule (lower (has_type $I16 (sshr x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Sra) (ext_int_if_need $true x $I16) (alu_andi y 15)) +) +(rule (lower (has_type $I16(sshr x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Srai) (ext_int_if_need $true x $I16) (imm12_and y 15))) + +(rule (lower (has_type $I32 (sshr x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Sraw) x y)) +(rule (lower (has_type $I32 (sshr x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Sraiw) x y)) +(rule (lower (has_type $I64 (sshr x (valueregs_2_reg y)))) + (alu_rrr (AluOPRRR.Sra) x y)) +(rule (lower (has_type $I64(sshr x (imm12_from_value y)))) + (alu_rr_imm12 (AluOPRRI.Srai) x y)) +(rule (lower (has_type $I128 (sshr x y))) + (lower_i128_sshr x y)) + + +;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty)(rotl x (valueregs_2_reg y)))) + (lower_rotl ty (ext_int_if_need $false x ty) y)) + +(rule (lower (has_type $I128 (rotl x y))) + (lower_i128_rotl x y)) + +;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty)(rotr x (valueregs_2_reg y)))) + (lower_rotr ty (ext_int_if_need $false x ty) y)) + +(rule (lower (has_type $I128 (rotr x y))) + (lower_i128_rotr x y)) + + +;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; notice x y order!!! +(rule (lower (has_type (fits_in_64 ty)(bxor_not x y))) + (gen_xor_not x y)) +(rule (lower (has_type $I128 (bxor_not x y))) + (let + ((low Reg (gen_xor_not (value_regs_get x 0) (value_regs_get y 0))) + (high Reg (gen_xor_not (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high) + ) +) + +;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty)(bor_not x y))) + (gen_orn x y)) + +(rule (lower (has_type $I128 (bor_not x y))) + (let + ((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0))) + (high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1)))) + (value_regs low high))) + + +;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type (fits_in_64 ty)(cls x))) + (lower_cls x ty)) +(rule (lower (has_type $I128 (cls x))) + (lower_cls_i128 x)) + + +;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule + (lower (has_type ty (fabs x))) + (gen_fabs x ty)) + +;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule + (lower (has_type ty (fneg x))) + (fpu_rrr (f_copy_neg_sign_op ty) ty x x)) + +;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type ty (fcopysign x y))) + (fpu_rrr (f_copysign_op ty) ty x y)) + +;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type $F32 (fma x y z))) + (fpu_rrrr (FpuOPRRRR.FmaddS) $F64 x y z)) +(rule (lower (has_type $F64 (fma x y z))) + (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 x y z)) + + +;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type $F32 (sqrt x))) + (fpu_rr (FpuOPRR.FsqrtS)$F64 x)) + +(rule (lower (has_type $F64 (sqrt x))) + (fpu_rr (FpuOPRR.FsqrtD)$F64 x)) + + +;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule + ;; + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags op addr x))) + (gen_atomic (get_atomic_rmw_op ty op) addr x (atomic_amo))) + +;;; for I8 and I16 +(rule + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags op addr x))) + (gen_atomic_rmw_loop op ty addr x)) + +;;;special for I8 and I16 max min etc. +;;;because I need uextend or sextend the value. +(rule + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $true) addr x))) + (gen_atomic_rmw_loop op ty addr (ext_int_if_need $true x ty))) + + +(rule + ;; + (lower + (has_type (valid_atomic_transaction (fits_in_16 ty)) (atomic_rmw flags (is_atomic_rmw_max_etc op $false) addr x))) + ;; + (gen_atomic_rmw_loop op ty addr (ext_int_if_need $false x ty))) + +;;;;; Rules for `AtomicRmwOp.Sub` +(rule + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr x))) + (let + ((tmp WritableReg (temp_writable_reg ty)) + (x2 Reg (alu_rrr (AluOPRRR.Sub) (zero_reg) x))) + (gen_atomic (get_atomic_rmw_op ty (AtomicRmwOp.Add)) addr x2 (atomic_amo)))) + +(decl gen_atomic_rmw_loop (AtomicRmwOp Type Reg Reg) Reg) +(rule + (gen_atomic_rmw_loop op ty addr x) + (let + ((dst WritableReg (temp_writable_reg $I64)) + (t0 WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.AtomicRmwLoop (gen_atomic_offset addr ty) op dst ty (gen_atomic_p addr ty) x t0)))) + (writable_reg_to_reg dst))) + +;;;;; Rules for `AtomicRmwOp.Nand` +(rule + (lower + (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr x))) + (gen_atomic_rmw_loop (AtomicRmwOp.Nand) ty addr x)) + +(decl is_atomic_rmw_max_etc (AtomicRmwOp bool)AtomicRmwOp) +(extern extractor is_atomic_rmw_max_etc is_atomic_rmw_max_etc) + +;;;;; Rules for `atomic load`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type (valid_atomic_transaction ty) (atomic_load flags p))) + (gen_atomic_load p ty)) + + +;;;;; Rules for `atomic store`;;;;;;;;;;;;;;;;; +(rule + (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) p)) + (gen_atomic_store p ty src)) + +(decl gen_atomic_offset (Reg Type) Reg) +(rule (gen_atomic_offset p (fits_in_16 ty)) + (alu_slli (alu_andi p 3) 3)) + +(rule (gen_atomic_offset p _) + (zero_reg)) + +(decl gen_atomic_p (Reg Type) Reg) +(rule (gen_atomic_p p (fits_in_16 ty)) + (alu_andi p -4)) + +(rule (gen_atomic_p p _) + p) + + +;;;;; Rules for `atomic cas`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type (valid_atomic_transaction ty) (atomic_cas flags p e x))) + (let + ((t0 WritableReg (temp_writable_reg ty)) + (dst WritableReg (temp_writable_reg ty)) + (_ Unit(emit (MInst.AtomicCas (gen_atomic_offset p ty) t0 dst (ext_int_if_need $false e ty) (gen_atomic_p p ty) x ty)))) + (writable_reg_to_reg dst))) + +;;;;; Rules for `copy`;;;;;;;;;;;;;;;;; +(rule (lower(has_type ty (copy x))) + (gen_move2 x ty ty)) + +;;;;; Rules for `breduce`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type ty (breduce x))) + (gen_move2 (value_regs_get x 0) ty ty)) + +;;;;; Rules for `ireduce`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type ty (ireduce x))) + (gen_move2 (value_regs_get x 0) ty ty)) + +;;;;; Rules for `fpromote`;;;;;;;;;;;;;;;;; +(rule + (lower (has_type ty (fpromote x))) + (fpu_rr (FpuOPRR.FcvtDS) ty x)) + +(rule + (lower (has_type ty (fdemote x))) + (fpu_rr (FpuOPRR.FcvtSD) ty x)) + + +;;;;; Rules for `for float arithmatic` +(rule + (lower (has_type ty (fadd x y))) + (fpu_rrr (f_arithmatic_op ty (Opcode.Fadd)) ty x y)) +(rule + (lower (has_type ty (fsub x y))) + (fpu_rrr (f_arithmatic_op ty (Opcode.Fsub)) ty x y)) +(rule + (lower (has_type ty (fmul x y))) + (fpu_rrr (f_arithmatic_op ty (Opcode.Fmul)) ty x y)) +(rule + (lower (has_type ty (fdiv x y))) + (fpu_rrr (f_arithmatic_op ty (Opcode.Fdiv)) ty x y)) + +(rule + (lower (has_type ty (fmin x y))) + (gen_float_select (FloatSelectOP.Min) x y ty)) + +(rule + (lower (has_type ty (fmin_pseudo x y))) + (gen_float_select_pseudo (FloatSelectOP.Min) x y ty)) + +(rule + (lower (has_type ty (fmax x y))) + (gen_float_select (FloatSelectOP.Max) x y ty)) + +(rule + (lower (has_type ty (fmax_pseudo x y))) + (gen_float_select_pseudo (FloatSelectOP.Max) x y ty)) + +;;;;; Rules for `stack_addr`;;;;;;;;; +(rule + (lower (stack_addr ss offset)) + (gen_stack_addr ss offset)) + +;;;;; Rules for `is_null`;;;;;;;;; +(rule + (lower (is_null v)) + (gen_reference_check (ReferenceCheckOP.IsNull) v)) + +;;;;; Rules for `is_invalid`;;;;;;;;; +(rule + (lower (is_invalid v)) + (gen_reference_check (ReferenceCheckOP.IsInvalid) v)) + +;;;;; Rules for `select`;;;;;;;;; +(rule + (lower (has_type ty (select c x y))) + (gen_select ty c x y) +) + +;;;;; Rules for `bitselect`;;;;;;;;; + +(rule + (lower (has_type ty (bitselect c x y))) + (gen_bitselect ty c x y)) + +;;;;; Rules for `bint`;;;;;;;;; +(rule + (lower (has_type (fits_in_64 ty) (bint (valueregs_2_reg x)))) + (gen_bint x)) +(rule + (lower (has_type $I128 (bint (valueregs_2_reg x)))) + (let ((tmp Reg (gen_bint x))) + (value_regs tmp (zero_reg))) +) + +;;;;; Rules for `isplit`;;;;;;;;; +(rule + (lower (isplit x)) + (let + ((t1 Reg (gen_move2 (value_regs_get x 0) $I64 $I64)) + (t2 Reg (gen_move2 (value_regs_get x 1) $I64 $I64))) + (output_pair t1 t2))) + +;;;;; Rules for `iconcat`;;;;;;;;; +(rule + (lower (has_type $I128 (iconcat x y))) + (let + ((t1 Reg (gen_move2 x $I64 $I64)) + (t2 Reg (gen_move2 y $I64 $I64))) + (value_regs t1 t2))) + +;;;;; Rules for `imax`;;;;;;;;; +(rule + (lower (has_type ty (imax x y))) + (gen_int_select ty (IntSelectOP.Imax) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) + +;;;;; Rules for `imin`;;;;;;;;; +(rule + (lower (has_type ty (imin x y))) + (gen_int_select ty(IntSelectOP.Imin) (ext_int_if_need $true x ty) (ext_int_if_need $true y ty))) +;;;;; Rules for `umax`;;;;;;;;; +(rule + (lower (has_type ty (umax x y))) + (gen_int_select ty(IntSelectOP.Umax) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) + +;;;;; Rules for `umin`;;;;;;;;; +(rule + (lower (has_type ty (umin x y))) + (gen_int_select ty(IntSelectOP.Umin) (ext_int_if_need $false x ty) (ext_int_if_need $false y ty))) + +;;;;; Rules for `debugtrap`;;;;;;;;; +(rule + (lower (debugtrap)) + (side_effect (SideEffectNoResult.Inst(MInst.EBreak)))) + +;;;;; Rules for `fence`;;;;;;;;; +(rule + (lower (fence)) + (side_effect (SideEffectNoResult.Inst(MInst.Fence 15 15)))) + +;;;;; Rules for `trap`;;;;;;;;; +(rule + (lower (trap code)) + (udf code)) + +;;;;; Rules for `resumable_trap`;;;;;;;;; +(rule + (lower (resumable_trap code)) + (udf code)) + +;;;;; Rules for `uload8`;;;;;;;;; +(rule + (lower (uload8 flags p offset)) + (gen_load p offset (int_load_op $false 8) flags $I64)) +;;;;; Rules for `sload8`;;;;;;;;; +(rule + (lower (sload8 flags p offset)) + (gen_load p offset (int_load_op $true 8) flags $I64)) +;;;;; Rules for `uload16`;;;;;;;;; +(rule + (lower (uload16 flags p offset)) + (gen_load p offset (int_load_op $false 16) flags $I64)) + +;;;;; Rules for `iload16`;;;;;;;;; +(rule + (lower (sload16 flags p offset)) + (gen_load p offset (int_load_op $true 16) flags $I64)) + +;;;;; Rules for `uload32`;;;;;;;;; +(rule + (lower (uload32 flags p offset)) + (gen_load p offset (int_load_op $false 32) flags $I64)) + +;;;;; Rules for `iload16`;;;;;;;;; +(rule + (lower (sload32 flags p offset)) + (gen_load p offset (int_load_op $true 32) flags $I64)) + +(rule + (lower (has_type ty (load flags p offset))) + (gen_load p offset (load_op ty) flags ty) +) +;;;; for I128 +(rule + (lower (has_type $I128 (load flags p offset))) + (gen_load_128 p offset flags)) +;;;; for B128 +(rule + (lower (has_type $B128 (load flags p offset))) + (gen_load_128 p offset flags)) + +;;;;; Rules for `istore8`;;;;;;;;; +(rule + (lower (istore8 flags x p offset)) + (gen_store p offset (StoreOP.Sb) flags x)) +;;;;; Rules for `istore16`;;;;;;;;; +(rule + (lower (istore16 flags x p offset)) + (gen_store p offset (StoreOP.Sh) flags x)) + +;;;;; Rules for `istore32`;;;;;;;;; +(rule + (lower (istore32 flags x p offset)) + (gen_store p offset (StoreOP.Sw) flags x)) + +;;;;; Rules for `store`;;;;;;;;; +(rule + (lower (store flags x @(value_type ty) p offset)) + (gen_store p offset (store_op ty) flags x)) + +;;; special for I128 +(rule + (lower (store flags x @ (value_type $I128 ) p offset)) + (gen_store_128 p offset flags x)) + +;;; special for B128 +(rule + (lower (store flags x @ (value_type $B128 ) p offset)) + (gen_store_128 p offset flags x)) + +(decl gen_icmp(IntCC ValueRegs ValueRegs Type)Reg) +(rule + (gen_icmp cc x y ty) + (let + ((result WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Icmp cc result x y ty)))) + result)) + +;;;;; Rules for `icmp`;;;;;;;;; +(rule + (lower (icmp cc x @ (value_type ty) y)) + (lower_icmp cc x y ty)) +;; special for `iadd_ifcout` first out. +(rule + (lower (icmp cc (iadd_ifcout a @ (value_type ty) b) y)) + (lower_icmp cc (alu_add a b) y ty)) + +(rule + (lower (icmp cc x (iadd_ifcout a @ (value_type ty) b))) + (lower_icmp cc x (alu_add a b) ty)) + +(decl gen_fcmp(FloatCC Value Value Type)Reg) +(rule + (gen_fcmp cc x y ty) + (let + ((result WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Fcmp cc result x y ty)))) + (writable_reg_to_reg result))) + +;;;;; Rules for `fcmp`;;;;;;;;; +(rule + (lower (fcmp cc x @ (value_type ty) y)) + (gen_fcmp cc x y ty)) + +;;;;; Rules for `func_addr`;;;;;;;;; +(rule + (lower (func_addr (func_ref_data _ name _))) + (load_ext_name name 0)) + +;;;;; Rules for `fcvt_to_uint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_uint v @(value_type from)))) + (gen_fcvt_int $false v $false from to)) + +;;;;; Rules for `fcvt_to_sint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_sint v @ (value_type from)))) + (gen_fcvt_int $false v $true from to)) + +;;;;; Rules for `fcvt_to_sint_sat`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_sint_sat v @ (value_type from)))) + (gen_fcvt_int $true v $true from to)) + +;;;;; Rules for `fcvt_to_uint_sat`;;;;;;;;; +(rule + (lower (has_type to (fcvt_to_uint_sat v @ (value_type from)))) + (gen_fcvt_int $true v $false from to)) + +;;;;; Rules for `fcvt_from_sint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_from_sint v @ (value_type from)))) + (fpu_rr (int_convert_2_float_op from $true to) to v)) + +;;;;; Rules for `fcvt_from_uint`;;;;;;;;; +(rule + (lower (has_type to (fcvt_from_uint v @ (value_type from)))) + (fpu_rr (int_convert_2_float_op from $false to) to v)) + +;;;;; Rules for `symbol_value`;;;;;;;;; +(rule + (lower (symbol_value (symbol_value_data name _ offset))) + (load_ext_name name offset) +) +;;;;; Rules for `bitcast`;;;;;;;;; +(rule + (lower (has_type out (bitcast v @ (value_type in_ty)))) + (gen_moves v in_ty out)) + +;;;;; Rules for `raw_bitcast`;;;;;;;;; +(rule + (lower (has_type out (raw_bitcast v @ (value_type in_ty)))) + (gen_moves v in_ty out)) + +;;;;; Rules for `ceil`;;;;;;;;; +(rule + (lower (has_type ty (ceil x))) + (gen_float_round (FloatRoundOP.Ceil) x ty) +) + +;;;;; Rules for `floor`;;;;;;;;; +(rule + (lower (has_type ty (floor x))) + (gen_float_round (FloatRoundOP.Floor) x ty)) +;;;;; Rules for `trunc`;;;;;;;;; +(rule + (lower (has_type ty (trunc x))) + (gen_float_round (FloatRoundOP.Trunc) x ty)) + +;;;;; Rules for `nearest`;;;;;;;;; +(rule + (lower (has_type ty (nearest x))) + (gen_float_round (FloatRoundOP.Nearest) x ty)) + + +;;;;; Rules for `selectif`;;;;;;;;; +(rule + (lower (has_type r_ty (selectif cc (ifcmp ca @ (value_type cty) cb) a b))) + (let + ((dst VecWritableReg (alloc_vec_writable r_ty)) + (r Reg (lower_icmp cc ca cb cty)) + (_ Unit (emit (MInst.SelectIf $false (vec_writable_clone dst) r a b)))) + (vec_writable_to_regs dst))) + +;;;;; Rules for `selectif_spectre_guard`;;;;;;;;; +(rule + (lower (has_type r_ty (selectif_spectre_guard cc (ifcmp ca @ (value_type cty) cb) a b))) + (let + ((dst VecWritableReg (alloc_vec_writable r_ty)) + (r Reg (lower_icmp cc ca cb cty)) + (_ Unit (emit (MInst.SelectIf $true (vec_writable_clone dst) r a b)))) + (vec_writable_to_regs dst))) + +;;;;; Rules for `trueif`;;;;;;;;; + +(rule + (lower (has_type ty (trueif cc (ifcmp ca @ (value_type cty) cb)))) + (lower_icmp cc ca cb cty)) + +;;;;; Rules for `trueff`;;;;;;;;; +(rule + (lower (has_type ty (trueff cc (ffcmp ca @ (value_type cty) cb)))) + (gen_fcmp cc ca cb cty)) + + +;;;;; Rules for `trapif`;;;;;;;;; +(rule + (lower (trapif cc (ifcmp a @ (value_type ty) b) trap_code)) + (let + ((test Reg (lower_icmp cc a b ty))) + (gen_trapif test trap_code))) + +(rule + (lower (trapif _ (iadd_ifcout a @ (value_type ty) b) trap_code)) + (let + ((test Reg (lower_uadd_overflow a b ty))) + (gen_trapif test trap_code))) + + +;;;;; Rules for `trapff`;;;;;;;;; +(rule + (lower (trapff cc (ffcmp a @(value_type ty) b) trap_code)) + (gen_trapff cc a b ty trap_code)) + +;;;;; Rules for `bmask`;;;;;;;;; +(rule + ;; because we encode bool all 1s. + ;; move is just ok. + (lower (has_type (fits_in_64 ty) (bmask x @ (value_type ity)))) + (gen_move2 (value_regs_get x 0) ity ty)) +;;; for i128 +(rule + ;; because we encode bool all 1s. + ;; move is just ok. + (lower (has_type $I128 (bmask x @ (value_type ity)))) + (value_regs (gen_move2 (value_regs_get x 0) $I64 $I64) (gen_move2 (value_regs_get x 0) $I64 $I64))) + +;;;;; Rules for `bextend`;;;;;;;;; +(rule + ;; because we encode bool all 1s. + ;; move is just ok. + (lower (has_type ty (bextend x @ (value_type ity)))) + ;;extra checks. + (if-let _ (valid_bextend_ty ity ty)) + (gen_moves x ity ty)) + +;;; for B128 +(rule + ;; because we encode bool all 1s. + ;; move is just ok. + (lower (has_type ty (bextend x @ (value_type ity)))) + ;;extra checks. + (if-let $B128 (valid_bextend_ty ity ty)) + (value_regs (gen_moves x $I64 $I64) (gen_moves x $I64 $I64))) + +;; N.B.: the Ret itself is generated by the ABI. +(rule (lower (return args)) + (lower_return (range 0 (value_slice_len args)) args)) + + +;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;; + +(rule (lower (get_frame_pointer)) + (gen_move2 (x_reg 8) $I64 $I64)) + +(rule (lower (get_stack_pointer)) + (gen_move2 (x_reg 2) $I64 $I64)) + +(rule (lower (get_return_address)) + (load_ra)) + +;;; Rules for `iabs` ;;;;;;;;;;;;; +(rule + (lower (has_type (fits_in_64 ty) (iabs x))) + (lower_iabs x ty)) + +;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (call (func_ref_data sig_ref extname dist) inputs)) + (gen_call sig_ref extname dist inputs)) + +(rule (lower (call_indirect sig_ref val inputs)) + (gen_call_indirect sig_ref val inputs)) \ No newline at end of file diff --git a/cranelift/codegen/src/isa/riscv64/lower.rs b/cranelift/codegen/src/isa/riscv64/lower.rs new file mode 100644 index 000000000000..840071c86091 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/lower.rs @@ -0,0 +1,62 @@ +//! Lowering rules for Riscv64. +use super::lower_inst; +use crate::ir::Inst as IRInst; +use crate::isa::riscv64::inst::*; +use crate::isa::riscv64::Riscv64Backend; +use crate::machinst::lower::*; +use crate::machinst::*; +use crate::CodegenResult; +pub mod isle; + +//============================================================================= +// Lowering-backend trait implementation. + +impl LowerBackend for Riscv64Backend { + type MInst = Inst; + + fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult<()> { + lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.triple, &self.flags, &self.isa_flags) + } + + fn lower_branch_group( + &self, + ctx: &mut Lower, + branches: &[IRInst], + targets: &[MachLabel], + ) -> CodegenResult<()> { + // A block should end with at most two branches. The first may be a + // conditional branch; a conditional branch can be followed only by an + // unconditional branch or fallthrough. Otherwise, if only one branch, + // it may be an unconditional branch, a fallthrough, a return, or a + // trap. These conditions are verified by `is_ebb_basic()` during the + // verifier pass. + assert!(branches.len() <= 2); + if branches.len() == 2 { + let op1 = ctx.data(branches[1]).opcode(); + assert!(op1 == Opcode::Jump); + } + + // Lower the first branch in ISLE. This will automatically handle + // the second branch (if any) by emitting a two-way conditional branch. + if let Ok(()) = super::lower::isle::lower_branch( + ctx, + &self.triple, + &self.flags, + &self.isa_flags, + branches[0], + targets, + ) { + return Ok(()); + } + unreachable!( + "implemented in ISLE: branch = `{}`", + ctx.dfg().display_inst(branches[0]), + ); + } + + fn maybe_pinned_reg(&self) -> Option { + // pinned register is a register that you want put anything in it. + // right now riscv64 not support this feature. + None + } +} diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs new file mode 100644 index 000000000000..056b499c6913 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -0,0 +1,544 @@ +//! ISLE integration glue code for riscv64 lowering. + +// Pull in the ISLE generated code. +#[allow(unused)] +pub mod generated_code; +use generated_code::{Context, MInst}; + +use target_lexicon::Triple; + +// Types that the generated ISLE code uses via `use super::*`. +use super::{writable_zero_reg, zero_reg}; +use std::vec::Vec; + +use crate::isa::riscv64::settings::Flags as IsaFlags; +use crate::machinst::{isle::*, MachInst, SmallInstVec}; +use crate::settings::Flags; + +use crate::machinst::{VCodeConstant, VCodeConstantData}; +use crate::{ + ir::{ + immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags, + StackSlot, TrapCode, Value, ValueList, + }, + isa::riscv64::inst::*, + machinst::{ArgPair, InsnOutput, Lower}, +}; +use regalloc2::PReg; + +use crate::isa::riscv64::abi::Riscv64ABICaller; +use std::boxed::Box; +use std::convert::TryFrom; + +use crate::machinst::Reg; + +type BoxCallInfo = Box; +type BoxCallIndInfo = Box; +type BoxExternalName = Box; +type VecMachLabel = Vec; +type VecArgPair = Vec; +use crate::machinst::valueregs; + +/// The main entry point for lowering with ISLE. +pub(crate) fn lower( + lower_ctx: &mut Lower, + flags: &Flags, + triple: &Triple, + isa_flags: &IsaFlags, + outputs: &[InsnOutput], + inst: Inst, +) -> Result<(), ()> { + lower_common( + lower_ctx, + triple, + flags, + isa_flags, + outputs, + inst, + |cx, insn| generated_code::constructor_lower(cx, insn), + ) +} + +impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { + isle_prelude_method_helpers!(Riscv64ABICaller); +} + +impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { + isle_prelude_methods!(); + isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller); + + fn vec_writable_to_regs(&mut self, val: &VecWritableReg) -> ValueRegs { + match val.len() { + 1 => ValueRegs::one(val[0].to_reg()), + 2 => ValueRegs::two(val[0].to_reg(), val[1].to_reg()), + _ => unreachable!(), + } + } + + fn valid_bextend_ty(&mut self, from: Type, to: Type) -> Option { + if from.is_bool() && to.is_bool() && from.bits() < to.bits() { + Some(to) + } else { + None + } + } + fn lower_br_fcmp( + &mut self, + cc: &FloatCC, + a: Reg, + b: Reg, + targets: &VecMachLabel, + ty: Type, + ) -> InstOutput { + let tmp = self.temp_writable_reg(I64); + MInst::lower_br_fcmp( + *cc, + a, + b, + BranchTarget::Label(targets[0]), + BranchTarget::Label(targets[1]), + ty, + tmp, + ) + .iter() + .for_each(|i| self.emit(i)); + InstOutput::default() + } + + fn lower_brz_or_nz( + &mut self, + cc: &IntCC, + a: ValueRegs, + targets: &VecMachLabel, + ty: Type, + ) -> InstOutput { + MInst::lower_br_icmp( + *cc, + a, + self.int_zero_reg(ty), + BranchTarget::Label(targets[0]), + BranchTarget::Label(targets[1]), + ty, + ) + .iter() + .for_each(|i| self.emit(i)); + InstOutput::default() + } + fn lower_br_icmp( + &mut self, + cc: &IntCC, + a: ValueRegs, + b: ValueRegs, + targets: &VecMachLabel, + ty: Type, + ) -> InstOutput { + let test = generated_code::constructor_lower_icmp(self, cc, a, b, ty).unwrap(); + self.emit(&MInst::CondBr { + taken: BranchTarget::Label(targets[0]), + not_taken: BranchTarget::Label(targets[1]), + kind: IntegerCompare { + kind: IntCC::NotEqual, + rs1: test, + rs2: zero_reg(), + }, + }); + InstOutput::default() + } + fn load_ra(&mut self) -> Reg { + if self.flags.preserve_frame_pointers() { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::Load { + rd: tmp, + op: LoadOP::Ld, + flags: MemFlags::trusted(), + from: AMode::FPOffset(8, I64), + }); + tmp.to_reg() + } else { + self.gen_move2(link_reg(), I64, I64) + } + } + fn int_zero_reg(&mut self, ty: Type) -> ValueRegs { + assert!(ty.is_int() || ty.is_bool(), "{:?}", ty); + if ty.bits() == 128 { + ValueRegs::two(self.zero_reg(), self.zero_reg()) + } else { + ValueRegs::one(self.zero_reg()) + } + } + + fn vec_label_get(&mut self, val: &VecMachLabel, x: u8) -> MachLabel { + val[x as usize] + } + + fn label_to_br_target(&mut self, label: MachLabel) -> BranchTarget { + BranchTarget::Label(label) + } + + fn vec_writable_clone(&mut self, v: &VecWritableReg) -> VecWritableReg { + v.clone() + } + + fn gen_moves(&mut self, rs: ValueRegs, in_ty: Type, out_ty: Type) -> ValueRegs { + let tmp = construct_dest(|ty| self.temp_writable_reg(ty), out_ty); + if in_ty.bits() < 64 { + self.emit(&gen_move(tmp.regs()[0], out_ty, rs.regs()[0], in_ty)); + } else { + gen_moves(tmp.regs(), rs.regs()) + .iter() + .for_each(|i| self.emit(i)); + } + tmp.map(|r| r.to_reg()) + } + fn imm12_and(&mut self, imm: Imm12, x: i32) -> Imm12 { + Imm12::from_bits(imm.as_i16() & (x as i16)) + } + fn alloc_vec_writable(&mut self, ty: Type) -> VecWritableReg { + if ty.is_int() || ty.is_bool() || ty == R32 || ty == R64 { + if ty.bits() <= 64 { + vec![self.temp_writable_reg(I64)] + } else { + vec![self.temp_writable_reg(I64), self.temp_writable_reg(I64)] + } + } else if ty.is_float() { + vec![self.temp_writable_reg(ty)] + } else { + unimplemented!("ty:{:?}", ty) + } + } + + fn imm(&mut self, ty: Type, mut val: u64) -> Reg { + // Boolean types + // Boolean values are either true or false. + + // The b1 type represents an abstract boolean value. It can only exist as an SSA value, and can't be directly stored in memory. It can, however, be converted into an integer with value 0 or 1 by the bint instruction (and converted back with icmp_imm with 0). + + // Several larger boolean types are also defined, primarily to be used as SIMD element types. They can be stored in memory, and are represented as either all zero bits or all one bits. + + // b1 + // b8 + // b16 + // b32 + // b64 + // /////////////////////////////////////////////////////////// + // "represented as either all zero bits or all one bits." + // \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ + if ty.is_bool() && val != 0 { + // need all be one + val = !0; + } + let tmp = self.temp_writable_reg(ty); + self.emit_list(&MInst::load_constant_u64(tmp, val)); + tmp.to_reg() + } + #[inline] + fn emit(&mut self, arg0: &MInst) -> Unit { + self.lower_ctx.emit(arg0.clone()); + } + #[inline] + fn imm12_from_u64(&mut self, arg0: u64) -> Option { + Imm12::maybe_from_u64(arg0) + } + #[inline] + fn writable_zero_reg(&mut self) -> WritableReg { + writable_zero_reg() + } + #[inline] + fn neg_imm12(&mut self, arg0: Imm12) -> Imm12 { + -arg0 + } + #[inline] + fn zero_reg(&mut self) -> Reg { + zero_reg() + } + #[inline] + fn imm_from_bits(&mut self, val: u64) -> Imm12 { + Imm12::maybe_from_u64(val).unwrap() + } + #[inline] + fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 { + Imm12::maybe_from_u64(val as u64).unwrap() + } + + fn gen_default_frm(&mut self) -> OptionFloatRoundingMode { + None + } + fn gen_select_reg(&mut self, cc: &IntCC, a: Reg, b: Reg, rs1: Reg, rs2: Reg) -> Reg { + let rd = self.temp_writable_reg(MInst::canonical_type_for_rc(rs1.class())); + self.emit(&MInst::SelectReg { + rd, + rs1, + rs2, + condition: IntegerCompare { + kind: *cc, + rs1: a, + rs2: b, + }, + }); + rd.to_reg() + } + fn load_u64_constant(&mut self, val: u64) -> Reg { + let rd = self.temp_writable_reg(I64); + MInst::load_constant_u64(rd, val) + .iter() + .for_each(|i| self.emit(i)); + rd.to_reg() + } + fn u8_as_i32(&mut self, x: u8) -> i32 { + x as i32 + } + + fn ext_sign_bit(&mut self, ty: Type, r: Reg) -> Reg { + assert!(ty.is_int()); + let rd = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRImm12 { + alu_op: AluOPRRI::Bexti, + rd, + rs: r, + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + }); + rd.to_reg() + } + fn imm12_const(&mut self, val: i32) -> Imm12 { + Imm12::maybe_from_u64(val as u64).unwrap() + } + fn imm12_const_add(&mut self, val: i32, add: i32) -> Imm12 { + Imm12::maybe_from_u64((val + add) as u64).unwrap() + } + + // + fn gen_shamt(&mut self, ty: Type, shamt: Reg) -> ValueRegs { + let shamt = { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRImm12 { + alu_op: AluOPRRI::Andi, + rd: tmp, + rs: shamt, + imm12: Imm12::from_bits((ty.bits() - 1) as i16), + }); + tmp.to_reg() + }; + let len_sub_shamt = { + let len_sub_shamt = self.temp_writable_reg(I64); + self.emit(&MInst::load_imm12( + len_sub_shamt, + Imm12::from_bits(ty.bits() as i16), + )); + self.emit(&MInst::AluRRR { + alu_op: AluOPRRR::Sub, + rd: len_sub_shamt, + rs1: len_sub_shamt.to_reg(), + rs2: shamt, + }); + len_sub_shamt.to_reg() + }; + ValueRegs::two(shamt, len_sub_shamt) + } + + fn has_b(&mut self) -> Option { + Some(self.isa_flags.has_b()) + } + fn has_zbkb(&mut self) -> Option { + Some(self.isa_flags.has_zbkb()) + } + + fn valueregs_2_reg(&mut self, val: Value) -> Reg { + self.put_in_regs(val).regs()[0] + } + + fn inst_output_get(&mut self, x: InstOutput, index: u8) -> ValueRegs { + x[index as usize] + } + + fn move_f_to_x(&mut self, r: Reg, ty: Type) -> Reg { + let result = self.temp_writable_reg(I64); + self.emit(&gen_move(result, I64, r, ty)); + result.to_reg() + } + fn offset32_imm(&mut self, offset: i32) -> Offset32 { + Offset32::new(offset) + } + fn default_memflags(&mut self) -> MemFlags { + MemFlags::new() + } + fn move_x_to_f(&mut self, r: Reg, ty: Type) -> Reg { + let result = self.temp_writable_reg(ty); + self.emit(&gen_move(result, ty, r, I64)); + result.to_reg() + } + + fn pack_float_rounding_mode(&mut self, f: &FRM) -> OptionFloatRoundingMode { + Some(*f) + } + + fn int_convert_2_float_op(&mut self, from: Type, is_signed: bool, to: Type) -> FpuOPRR { + FpuOPRR::int_convert_2_float_op(from, is_signed, to) + } + fn gen_amode(&mut self, base: Reg, offset: Offset32, ty: Type) -> AMode { + AMode::RegOffset(base, i64::from(offset), ty) + } + fn valid_atomic_transaction(&mut self, ty: Type) -> Option { + if ty.is_int() && ty.bits() <= 64 { + Some(ty) + } else { + None + } + } + fn is_atomic_rmw_max_etc(&mut self, op: &AtomicRmwOp) -> Option<(AtomicRmwOp, bool)> { + let op = *op; + match op { + crate::ir::AtomicRmwOp::Umin => Some((op, false)), + crate::ir::AtomicRmwOp::Umax => Some((op, false)), + crate::ir::AtomicRmwOp::Smin => Some((op, true)), + crate::ir::AtomicRmwOp::Smax => Some((op, true)), + _ => None, + } + } + fn load_op(&mut self, ty: Type) -> LoadOP { + LoadOP::from_type(ty) + } + fn store_op(&mut self, ty: Type) -> StoreOP { + StoreOP::from_type(ty) + } + fn load_ext_name(&mut self, name: ExternalName, offset: i64) -> Reg { + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::LoadExtName { + rd: tmp, + name: Box::new(name), + offset, + }); + tmp.to_reg() + } + + fn offset32_add(&mut self, a: Offset32, adden: i64) -> Offset32 { + a.try_add_i64(adden).expect("offset exceed range.") + } + + fn gen_stack_addr(&mut self, slot: StackSlot, offset: Offset32) -> Reg { + let result = self.temp_writable_reg(I64); + let i = self + .lower_ctx + .abi() + .sized_stackslot_addr(slot, i64::from(offset) as u32, result); + self.emit(&i); + result.to_reg() + } + fn atomic_amo(&mut self) -> AMO { + AMO::SeqCst + } + + fn gen_move2(&mut self, r: Reg, ity: Type, oty: Type) -> Reg { + let tmp = self.temp_writable_reg(oty); + self.emit(&gen_move(tmp, oty, r, ity)); + tmp.to_reg() + } + + fn intcc_is_gt_etc(&mut self, cc: &IntCC) -> Option<(IntCC, bool)> { + let cc = *cc; + match cc { + IntCC::SignedLessThan => Some((cc, true)), + IntCC::SignedGreaterThanOrEqual => Some((cc, true)), + IntCC::SignedGreaterThan => Some((cc, true)), + IntCC::SignedLessThanOrEqual => Some((cc, true)), + // + IntCC::UnsignedLessThan => Some((cc, false)), + IntCC::UnsignedGreaterThanOrEqual => Some((cc, false)), + IntCC::UnsignedGreaterThan => Some((cc, false)), + IntCC::UnsignedLessThanOrEqual => Some((cc, false)), + _ => None, + } + } + fn intcc_is_eq_or_ne(&mut self, cc: &IntCC) -> Option { + let cc = *cc; + if cc == IntCC::Equal || cc == IntCC::NotEqual { + Some(cc) + } else { + None + } + } + fn lower_br_table(&mut self, index: Reg, targets: &VecMachLabel) -> InstOutput { + let tmp = self.temp_writable_reg(I64); + let default_ = BranchTarget::Label(targets[0]); + let targets: Vec = targets + .iter() + .skip(1) + .map(|bix| BranchTarget::Label(*bix)) + .collect(); + self.emit(&MInst::BrTableCheck { + index, + targets_len: targets.len() as i32, + default_: default_, + }); + self.emit(&MInst::BrTable { + index, + tmp1: tmp, + targets, + }); + InstOutput::default() + } + fn x_reg(&mut self, x: u8) -> Reg { + x_reg(x as usize) + } + fn shift_int_to_most_significant(&mut self, v: Reg, ty: Type) -> Reg { + assert!(ty.is_int() && ty.bits() <= 64); + if ty == I64 { + return v; + } + let tmp = self.temp_writable_reg(I64); + self.emit(&MInst::AluRRImm12 { + alu_op: AluOPRRI::Slli, + rd: tmp, + rs: v, + imm12: Imm12::from_bits((64 - ty.bits()) as i16), + }); + + tmp.to_reg() + } +} + +impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { + #[inline] + fn emit_list(&mut self, list: &SmallInstVec) { + for i in list { + self.lower_ctx.emit(i.clone()); + } + } +} + +/// The main entry point for branch lowering with ISLE. +pub(crate) fn lower_branch( + lower_ctx: &mut Lower, + triple: &Triple, + flags: &Flags, + isa_flags: &IsaFlags, + branch: Inst, + targets: &[MachLabel], +) -> Result<(), ()> { + lower_common( + lower_ctx, + triple, + flags, + isa_flags, + &[], + branch, + |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()), + ) +} + +/// construct destination according to ty. +fn construct_dest WritableReg>( + mut alloc: F, + ty: Type, +) -> WritableValueRegs { + if ty.is_bool() || ty.is_int() { + if ty.bits() == 128 { + WritableValueRegs::two(alloc(I64), alloc(I64)) + } else { + WritableValueRegs::one(alloc(I64)) + } + } else if ty.is_float() { + WritableValueRegs::one(alloc(F64)) + } else { + unimplemented!("vector type not implemented."); + } +} diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/riscv64/lower/isle/generated_code.rs new file mode 100644 index 000000000000..c595c311d05a --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/lower/isle/generated_code.rs @@ -0,0 +1,9 @@ +// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of +// the generated ISLE source below because we include!() it. We must include!() it because its path +// depends on an environment variable; and also because of this, we can't do the `#[path = "..."] +// mod generated_code;` trick either. +#![allow(dead_code, unreachable_code, unreachable_patterns)] +#![allow(unused_imports, unused_variables, non_snake_case, unused_mut)] +#![allow(irrefutable_let_patterns)] + +include!(concat!(env!("ISLE_DIR"), "/isle_riscv64.rs")); diff --git a/cranelift/codegen/src/isa/riscv64/lower_inst.rs b/cranelift/codegen/src/isa/riscv64/lower_inst.rs new file mode 100644 index 000000000000..db0f4e5a57a7 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/lower_inst.rs @@ -0,0 +1,36 @@ +//! Lower a single Cranelift instruction into vcode. + +use crate::ir::Inst as IRInst; + +use crate::isa::riscv64::settings as riscv64_settings; +use crate::machinst::lower::*; +use crate::machinst::*; +use crate::settings::Flags; +use crate::CodegenResult; + +use crate::isa::riscv64::inst::*; +use target_lexicon::Triple; + +/// Actually codegen an instruction's results into registers. +pub(crate) fn lower_insn_to_regs( + ctx: &mut Lower, + insn: IRInst, + triple: &Triple, + flags: &Flags, + isa_flags: &riscv64_settings::Flags, +) -> CodegenResult<()> { + let outputs = insn_outputs(ctx, insn); + let ty = if outputs.len() > 0 { + Some(ctx.output_ty(insn, 0)) + } else { + None + }; + if let Ok(()) = super::lower::isle::lower(ctx, flags, triple, isa_flags, &outputs, insn) { + return Ok(()); + } + unreachable!( + "not implemented in ISLE: inst = `{}`, type = `{:?}`", + ctx.dfg().display_inst(insn), + ty + ); +} diff --git a/cranelift/codegen/src/isa/riscv64/mod.rs b/cranelift/codegen/src/isa/riscv64/mod.rs new file mode 100644 index 000000000000..3632ed1d8c20 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/mod.rs @@ -0,0 +1,246 @@ +//! risc-v 64-bit Instruction Set Architecture. + +use crate::ir; +use crate::ir::condcodes::IntCC; +use crate::ir::Function; + +use crate::isa::riscv64::settings as riscv_settings; +use crate::isa::{Builder as IsaBuilder, TargetIsa}; +use crate::machinst::{ + compile, CompiledCode, CompiledCodeStencil, MachTextSectionBuilder, Reg, SigSet, + TextSectionBuilder, VCode, +}; +use crate::result::CodegenResult; +use crate::settings as shared_settings; +use alloc::{boxed::Box, vec::Vec}; +use core::fmt; +use regalloc2::MachineEnv; +use target_lexicon::{Architecture, Triple}; +mod abi; +pub(crate) mod inst; +mod lower; +mod lower_inst; +mod settings; +#[cfg(feature = "unwind")] +use crate::isa::unwind::systemv; + +use inst::crate_reg_eviroment; + +use self::inst::EmitInfo; + +/// An riscv64 backend. +pub struct Riscv64Backend { + triple: Triple, + flags: shared_settings::Flags, + isa_flags: riscv_settings::Flags, + mach_env: MachineEnv, +} + +impl Riscv64Backend { + /// Create a new riscv64 backend with the given (shared) flags. + pub fn new_with_flags( + triple: Triple, + flags: shared_settings::Flags, + isa_flags: riscv_settings::Flags, + ) -> Riscv64Backend { + let mach_env = crate_reg_eviroment(&flags); + Riscv64Backend { + triple, + flags, + isa_flags, + mach_env, + } + } + + /// This performs lowering to VCode, register-allocates the code, computes block layout and + /// finalizes branches. The result is ready for binary emission. + fn compile_vcode( + &self, + func: &Function, + flags: shared_settings::Flags, + ) -> CodegenResult<(VCode, regalloc2::Output)> { + let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone()); + let sigs = SigSet::new::(func, &self.flags)?; + let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?; + compile::compile::(func, self, abi, &self.mach_env, emit_info, sigs) + } +} + +impl TargetIsa for Riscv64Backend { + fn compile_function( + &self, + func: &Function, + want_disasm: bool, + ) -> CodegenResult { + let flags = self.flags(); + let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?; + + let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug); + let emit_result = vcode.emit(®alloc_result, want_disasm, flags.machine_code_cfg_info()); + let frame_size = emit_result.frame_size; + let value_labels_ranges = emit_result.value_labels_ranges; + let buffer = emit_result.buffer.finish(); + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; + + if let Some(disasm) = emit_result.disasm.as_ref() { + log::debug!("disassembly:\n{}", disasm); + } + + Ok(CompiledCodeStencil { + buffer, + frame_size, + disasm: emit_result.disasm, + value_labels_ranges, + sized_stackslot_offsets, + dynamic_stackslot_offsets, + bb_starts: emit_result.bb_offsets, + bb_edges: emit_result.bb_edges, + alignment: emit_result.alignment, + }) + } + + fn name(&self) -> &'static str { + "riscv64" + } + fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 { + 16 + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &shared_settings::Flags { + &self.flags + } + + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + + fn unsigned_add_overflow_condition(&self) -> IntCC { + IntCC::UnsignedGreaterThanOrEqual + } + + #[cfg(feature = "unwind")] + fn emit_unwind_info( + &self, + result: &CompiledCode, + kind: crate::machinst::UnwindInfoKind, + ) -> CodegenResult> { + use crate::isa::unwind::UnwindInfo; + use crate::machinst::UnwindInfoKind; + Ok(match kind { + UnwindInfoKind::SystemV => { + let mapper = self::inst::unwind::systemv::RegisterMapper; + Some(UnwindInfo::SystemV( + crate::isa::unwind::systemv::create_unwind_info_from_insts( + &result.buffer.unwind_info[..], + result.buffer.data().len(), + &mapper, + )?, + )) + } + UnwindInfoKind::Windows => None, + _ => None, + }) + } + + #[cfg(feature = "unwind")] + fn create_systemv_cie(&self) -> Option { + Some(inst::unwind::systemv::create_cie()) + } + + fn text_section_builder(&self, num_funcs: u32) -> Box { + Box::new(MachTextSectionBuilder::::new(num_funcs)) + } + + #[cfg(feature = "unwind")] + fn map_regalloc_reg_to_dwarf(&self, reg: Reg) -> Result { + inst::unwind::systemv::map_reg(reg).map(|reg| reg.0) + } + + fn function_alignment(&self) -> u32 { + 4 + } +} + +impl fmt::Display for Riscv64Backend { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("MachBackend") + .field("name", &self.name()) + .field("triple", &self.triple()) + .field("flags", &format!("{}", self.flags())) + .finish() + } +} + +/// Create a new `isa::Builder`. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + match triple.architecture { + Architecture::Riscv64(..) => {} + _ => unreachable!(), + } + IsaBuilder { + triple, + setup: riscv_settings::builder(), + constructor: |triple, shared_flags, builder| { + let isa_flags = riscv_settings::Flags::new(&shared_flags, builder); + let backend = Riscv64Backend::new_with_flags(triple, shared_flags, isa_flags); + Ok(Box::new(backend)) + }, + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::types::*; + use crate::ir::{AbiParam, Function, InstBuilder, Signature, UserFuncName}; + use crate::isa::CallConv; + use crate::settings; + use crate::settings::Configurable; + use core::str::FromStr; + use target_lexicon::Triple; + + #[test] + fn test_compile_function() { + let name = UserFuncName::testcase("test0"); + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(I32)); + sig.returns.push(AbiParam::new(I32)); + let mut func = Function::with_name_signature(name, sig); + + let bb0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(bb0, I32); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(bb0); + let v0 = pos.ins().iconst(I32, 0x1234); + let v1 = pos.ins().iadd(arg0, v0); + pos.ins().return_(&[v1]); + + let mut shared_flags_builder = settings::builder(); + shared_flags_builder.set("opt_level", "none").unwrap(); + let shared_flags = settings::Flags::new(shared_flags_builder); + let isa_flags = riscv_settings::Flags::new(&shared_flags, riscv_settings::builder()); + let backend = Riscv64Backend::new_with_flags( + Triple::from_str("riscv64").unwrap(), + shared_flags, + isa_flags, + ); + let buffer = backend.compile_function(&mut func, true).unwrap(); + let code = buffer.buffer.data(); + // 0: 000015b7 lui a1,0x1 + // 4: 23458593 addi a1,a1,564 # 0x1234 + // 8: 00b5053b addw a0,a0,a1 + // c: 00008067 ret + let golden = vec![ + 0xb7, 0x15, 0x0, 0x0, 0x93, 0x85, 0x45, 0x23, 0x3b, 0x5, 0xb5, 0x0, 0x67, 0x80, 0x0, + 0x0, + ]; + assert_eq!(code, &golden[..]); + } +} diff --git a/cranelift/codegen/src/isa/riscv64/settings.rs b/cranelift/codegen/src/isa/riscv64/settings.rs new file mode 100644 index 000000000000..993062a9b831 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv64/settings.rs @@ -0,0 +1,8 @@ +//! riscv64 Settings. + +use crate::settings::{self, detail, Builder, Value}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +include!(concat!(env!("OUT_DIR"), "/settings-riscv64.rs")); diff --git a/cranelift/filetests/filetests/isa/riscv64/amodes.clif b/cranelift/filetests/filetests/isa/riscv64/amodes.clif new file mode 100644 index 000000000000..3ec876a999fa --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/amodes.clif @@ -0,0 +1,365 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f5(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = iadd.i64 v0, v2 + v4 = load.i32 v3 + return v4 +} + +; block0: +; sext.w a3,a1 +; add a3,a0,a3 +; lw a0,0(a3) +; ret + +function %f6(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = iadd.i64 v2, v0 + v4 = load.i32 v3 + return v4 +} + +; block0: +; sext.w a3,a1 +; add a3,a3,a0 +; lw a0,0(a3) +; ret + +function %f7(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = uextend.i64 v0 + v3 = uextend.i64 v1 + v4 = iadd.i64 v2, v3 + v5 = load.i32 v4 + return v5 +} + +; block0: +; uext.w a4,a0 +; uext.w a5,a1 +; add a4,a4,a5 +; lw a0,0(a4) +; ret + +function %f8(i64, i32) -> i32 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = iconst.i64 32 + v4 = iadd.i64 v2, v3 + v5 = iadd.i64 v4, v0 + v6 = iadd.i64 v5, v5 + v7 = load.i32 v6+4 + return v7 +} + +; block0: +; sext.w a5,a1 +; addi a5,a5,32 +; add a5,a5,a0 +; add a5,a5,a5 +; lw a0,4(a5) +; ret + +function %f9(i64, i64, i64) -> i32 { +block0(v0: i64, v1: i64, v2: i64): + v3 = iconst.i64 48 + v4 = iadd.i64 v0, v1 + v5 = iadd.i64 v4, v2 + v6 = iadd.i64 v5, v3 + v7 = load.i32 v6 + return v7 +} + +; block0: +; add a5,a0,a1 +; add a5,a5,a2 +; addi a5,a5,48 +; lw a0,0(a5) +; ret + +function %f10(i64, i64, i64) -> i32 { +block0(v0: i64, v1: i64, v2: i64): + v3 = iconst.i64 4100 + v4 = iadd.i64 v0, v1 + v5 = iadd.i64 v4, v2 + v6 = iadd.i64 v5, v3 + v7 = load.i32 v6 + return v7 +} + +; block0: +; add a6,a0,a1 +; add a6,a6,a2 +; lui a5,1 +; addi a5,a5,4 +; add t3,a6,a5 +; lw a0,0(t3) +; ret + +function %f10() -> i32 { +block0: + v1 = iconst.i64 1234 + v2 = load.i32 v1 + return v2 +} + +; block0: +; li t1,1234 +; lw a0,0(t1) +; ret + +function %f11(i64) -> i32 { +block0(v0: i64): + v1 = iconst.i64 8388608 ;; Imm12: 0x800 << 12 + v2 = iadd.i64 v0, v1 + v3 = load.i32 v2 + return v3 +} + +; block0: +; lui a1,2048 +; add a3,a0,a1 +; lw a0,0(a3) +; ret + +function %f12(i64) -> i32 { +block0(v0: i64): + v1 = iconst.i64 -4 + v2 = iadd.i64 v0, v1 + v3 = load.i32 v2 + return v3 +} + +; block0: +; addi a1,a0,-4 +; lw a0,0(a1) +; ret + +function %f13(i64) -> i32 { +block0(v0: i64): + v1 = iconst.i64 1000000000 + v2 = iadd.i64 v0, v1 + v3 = load.i32 v2 + return v3 +} + +; block0: +; lui a1,244141 +; addi a1,a1,2560 +; add a4,a0,a1 +; lw a0,0(a4) +; ret + +function %f14(i32) -> i32 { +block0(v0: i32): + v1 = sextend.i64 v0 + v2 = load.i32 v1 + return v2 +} + +; block0: +; sext.w a1,a0 +; lw a0,0(a1) +; ret + +function %f15(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sextend.i64 v0 + v3 = sextend.i64 v1 + v4 = iadd.i64 v2, v3 + v5 = load.i32 v4 + return v5 +} + +; block0: +; sext.w a4,a0 +; sext.w a5,a1 +; add a4,a4,a5 +; lw a0,0(a4) +; ret + +function %f18(i64, i64, i64) -> i32 { +block0(v0: i64, v1: i64, v2: i64): + v3 = iconst.i32 -4098 + v6 = uextend.i64 v3 + v5 = sload16.i32 v6+0 + return v5 +} + +; block0: +; lui a3,1048575 +; addi a3,a3,4094 +; uext.w a6,a3 +; lh a0,0(a6) +; ret + +function %f19(i64, i64, i64) -> i32 { +block0(v0: i64, v1: i64, v2: i64): + v3 = iconst.i32 4098 + v6 = uextend.i64 v3 + v5 = sload16.i32 v6+0 + return v5 +} + +; block0: +; lui a3,1 +; addi a3,a3,2 +; uext.w a6,a3 +; lh a0,0(a6) +; ret + +function %f20(i64, i64, i64) -> i32 { +block0(v0: i64, v1: i64, v2: i64): + v3 = iconst.i32 -4098 + v6 = sextend.i64 v3 + v5 = sload16.i32 v6+0 + return v5 +} + +; block0: +; lui a3,1048575 +; addi a3,a3,4094 +; sext.w a6,a3 +; lh a0,0(a6) +; ret + +function %f21(i64, i64, i64) -> i32 { +block0(v0: i64, v1: i64, v2: i64): + v3 = iconst.i32 4098 + v6 = sextend.i64 v3 + v5 = sload16.i32 v6+0 + return v5 +} + +; block0: +; lui a3,1 +; addi a3,a3,2 +; sext.w a6,a3 +; lh a0,0(a6) +; ret + +function %i128(i64) -> i128 { +block0(v0: i64): + v1 = load.i128 v0 + store.i128 v1, v0 + return v1 +} + +; block0: +; ld a1,0(a0) +; mv a3,a1 +; ld a1,8(a0) +; mv a5,a3 +; sd a5,0(a0) +; sd a1,8(a0) +; mv a0,a3 +; ret + +function %i128_imm_offset(i64) -> i128 { +block0(v0: i64): + v1 = load.i128 v0+16 + store.i128 v1, v0+16 + return v1 +} + +; block0: +; ld a1,16(a0) +; mv a3,a1 +; ld a1,24(a0) +; mv a5,a3 +; sd a5,16(a0) +; sd a1,24(a0) +; mv a0,a3 +; ret + +function %i128_imm_offset_large(i64) -> i128 { +block0(v0: i64): + v1 = load.i128 v0+504 + store.i128 v1, v0+504 + return v1 +} + +; block0: +; ld a1,504(a0) +; mv a3,a1 +; ld a1,512(a0) +; mv a5,a3 +; sd a5,504(a0) +; sd a1,512(a0) +; mv a0,a3 +; ret + +function %i128_imm_offset_negative_large(i64) -> i128 { +block0(v0: i64): + v1 = load.i128 v0-512 + store.i128 v1, v0-512 + return v1 +} + +; block0: +; ld a1,-512(a0) +; mv a3,a1 +; ld a1,-504(a0) +; mv a5,a3 +; sd a5,-512(a0) +; sd a1,-504(a0) +; mv a0,a3 +; ret + +function %i128_add_offset(i64) -> i128 { +block0(v0: i64): + v1 = iadd_imm v0, 32 + v2 = load.i128 v1 + store.i128 v2, v1 + return v2 +} + +; block0: +; addi a3,a0,32 +; ld a0,0(a3) +; ld a1,8(a3) +; sd a0,0(a3) +; sd a1,8(a3) +; ret + +function %i128_32bit_sextend_simple(i32) -> i128 { +block0(v0: i32): + v1 = sextend.i64 v0 + v2 = load.i128 v1 + store.i128 v2, v1 + return v2 +} + +; block0: +; sext.w a3,a0 +; ld a0,0(a3) +; ld a1,8(a3) +; sd a0,0(a3) +; sd a1,8(a3) +; ret + +function %i128_32bit_sextend(i64, i32) -> i128 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = iadd.i64 v0, v2 + v4 = iadd_imm.i64 v3, 24 + v5 = load.i128 v4 + store.i128 v5, v4 + return v5 +} + +; block0: +; sext.w a6,a1 +; add a6,a0,a6 +; addi a6,a6,24 +; ld a0,0(a6) +; ld a1,8(a6) +; sd a0,0(a6) +; sd a1,8(a6) +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif b/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif new file mode 100644 index 000000000000..a6507fea5c01 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/arithmetic.clif @@ -0,0 +1,509 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f1(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iadd.i64 v0, v1 + return v2 +} + +; block0: +; add a0,a0,a1 +; ret + +function %f2(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = isub.i64 v0, v1 + return v2 +} + +; block0: +; sub a0,a0,a1 +; ret + +function %f3(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = imul.i64 v0, v1 + return v2 +} + +; block0: +; mul a0,a0,a1 +; ret + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = umulhi.i64 v0, v1 + return v2 +} + +; block0: +; mulhu a0,a0,a1 +; ret + +function %f5(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = smulhi.i64 v0, v1 + return v2 +} + +; block0: +; mulh a0,a0,a1 +; ret + +function %f6(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sdiv.i64 v0, v1 + return v2 +} + +; block0: +; li a2,-1 +; li a3,1 +; slli a5,a3,63 +; eq a7,a2,a1##ty=i64 +; eq t4,a5,a0##ty=i64 +; and t1,a7,t4 +; trap_if t1,int_ovf +; trap_ifc int_divz##(zero eq a1) +; div a0,a0,a1 +; ret + +function %f7(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = sdiv.i64 v0, v1 + return v2 +} + +; block0: +; li a1,2 +; li a2,-1 +; li a4,1 +; slli a6,a4,63 +; eq t3,a2,a1##ty=i64 +; eq t0,a6,a0##ty=i64 +; and t2,t3,t0 +; trap_if t2,int_ovf +; li a2,2 +; trap_ifc int_divz##(zero eq a2) +; li a5,2 +; div a0,a0,a5 +; ret + +function %f8(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = udiv.i64 v0, v1 + return v2 +} + +; block0: +; trap_ifc int_divz##(zero eq a1) +; divu a0,a0,a1 +; ret + +function %f9(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = udiv.i64 v0, v1 + return v2 +} + +; block0: +; li a1,2 +; trap_ifc int_divz##(zero eq a1) +; li a3,2 +; divu a0,a0,a3 +; ret + +function %f10(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem.i64 v0, v1 + return v2 +} + +; block0: +; trap_ifc int_divz##(zero eq a1) +; rem a0,a0,a1 +; ret + +function %f11(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = urem.i64 v0, v1 + return v2 +} + +; block0: +; trap_ifc int_divz##(zero eq a1) +; remu a0,a0,a1 +; ret + +function %f12(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv.i32 v0, v1 + return v2 +} + +; block0: +; sext.w a2,a0 +; sext.w a3,a1 +; li a5,-1 +; li a7,1 +; slli t4,a7,63 +; slli t1,a2,32 +; eq a0,a5,a3##ty=i32 +; eq a4,t4,t1##ty=i32 +; and a4,a0,a4 +; trap_if a4,int_ovf +; trap_ifc int_divz##(zero eq a3) +; divw a0,a2,a3 +; ret + +function %f13(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = sdiv.i32 v0, v1 + return v2 +} + +; block0: +; sext.w a0,a0 +; li a2,2 +; sext.w a4,a2 +; li a6,-1 +; li t3,1 +; slli t0,t3,63 +; slli t2,a0,32 +; eq a1,a6,a4##ty=i32 +; eq a3,t0,t2##ty=i32 +; and a5,a1,a3 +; trap_if a5,int_ovf +; trap_ifc int_divz##(zero eq a4) +; divw a0,a0,a4 +; ret + +function %f14(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv.i32 v0, v1 + return v2 +} + +; block0: +; uext.w a1,a1 +; trap_ifc int_divz##(zero eq a1) +; uext.w a4,a0 +; divuw a0,a4,a1 +; ret + +function %f15(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = udiv.i32 v0, v1 + return v2 +} + +; block0: +; li a1,2 +; uext.w a2,a1 +; trap_ifc int_divz##(zero eq a2) +; uext.w a5,a0 +; divuw a0,a5,a2 +; ret + +function %f16(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem.i32 v0, v1 + return v2 +} + +; block0: +; sext.w a1,a1 +; trap_ifc int_divz##(zero eq a1) +; remw a0,a0,a1 +; ret + +function %f17(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem.i32 v0, v1 + return v2 +} + +; block0: +; uext.w a1,a1 +; trap_ifc int_divz##(zero eq a1) +; remuw a0,a0,a1 +; ret + +function %f18(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band.i64 v0, v1 + return v2 +} + +; block0: +; and a0,a0,a1 +; ret + +function %f19(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor.i64 v0, v1 + return v2 +} + +; block0: +; or a0,a0,a1 +; ret + +function %f20(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor.i64 v0, v1 + return v2 +} + +; block0: +; xor a0,a0,a1 +; ret + +function %f21(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band_not.i64 v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; and a0,a0,a1 +; ret + +function %f22(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor_not.i64 v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; or a0,a0,a1 +; ret + +function %f23(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor_not.i64 v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; xor a0,a0,a1 +; ret + +function %f24(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bnot.i64 v0 + return v2 +} + +; block0: +; not a0,a0 +; ret + +function %f25(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iconst.i32 53 + v3 = ishl.i32 v0, v2 + v4 = isub.i32 v1, v3 + return v4 +} + +; block0: +; slliw a2,a0,53 +; subw a0,a1,a2 +; ret + +function %f26(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = iadd.i32 v0, v1 + return v2 +} + +; block0: +; li a1,-1 +; addw a0,a0,a1 +; ret + +function %f27(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = isub.i32 v0, v1 + return v2 +} + +; block0: +; li a1,-1 +; subw a0,a0,a1 +; ret + +function %f28(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -1 + v2 = isub.i64 v0, v1 + return v2 +} + +; block0: +; li a1,-1 +; sub a0,a0,a1 +; ret + +function %f29(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = ineg v1 + return v2 +} + +; block0: +; li a0,1 +; sub a0,zero,a0 +; ret + +function %add_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = iadd v0, v1 + return v2 +} + +; block0: +; add a0,a0,a2 +; sltu a6,a0,a2 +; add t3,a1,a3 +; add a1,t3,a6 +; ret + +function %sub_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = isub v0, v1 + return v2 +} + +; block0: +; sub a4,a0,a2 +; mv t4,a4 +; sltu a6,a0,t4 +; sub t3,a1,a3 +; sub a1,t3,a6 +; mv a0,a4 +; ret + +function %add_mul_2(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = imul v1, v2 + v4 = iadd v3, v0 + return v4 +} + +; block0: +; mulw a3,a1,a2 +; addw a0,a3,a0 +; ret + +function %msub_i32(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = imul v1, v2 + v4 = isub v0, v3 + return v4 +} + +; block0: +; mulw a3,a1,a2 +; subw a0,a0,a3 +; ret + +function %msub_i64(i64, i64, i64) -> i64 { +block0(v0: i64, v1: i64, v2: i64): + v3 = imul v1, v2 + v4 = isub v0, v3 + return v4 +} + +; block0: +; mul a3,a1,a2 +; sub a0,a0,a3 +; ret + +function %imul_sub_i32(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = imul v1, v2 + v4 = isub v3, v0 + return v4 +} + +; block0: +; mulw a3,a1,a2 +; subw a0,a3,a0 +; ret + +function %imul_sub_i64(i64, i64, i64) -> i64 { +block0(v0: i64, v1: i64, v2: i64): + v3 = imul v1, v2 + v4 = isub v3, v0 + return v4 +} + +; block0: +; mul a3,a1,a2 +; sub a0,a3,a0 +; ret + +function %srem_const (i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = srem.i64 v0, v1 + return v2 +} + +; block0: +; li a1,2 +; trap_ifc int_divz##(zero eq a1) +; li a3,2 +; rem a0,a0,a3 +; ret + +function %urem_const (i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = urem.i64 v0, v1 + return v2 +} + +; block0: +; li a1,2 +; trap_ifc int_divz##(zero eq a1) +; li a3,2 +; remu a0,a0,a3 +; ret + +function %sdiv_minus_one(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -1 + v2 = sdiv.i64 v0, v1 + return v2 +} + +; block0: +; li a1,-1 +; li a2,-1 +; li a4,1 +; slli a6,a4,63 +; eq t3,a2,a1##ty=i64 +; eq t0,a6,a0##ty=i64 +; and t2,t3,t0 +; trap_if t2,int_ovf +; li a2,-1 +; trap_ifc int_divz##(zero eq a2) +; li a5,-1 +; div a0,a0,a5 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/atomic-rmw.clif b/cranelift/filetests/filetests/isa/riscv64/atomic-rmw.clif new file mode 100644 index 000000000000..8d68abfbc4a9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/atomic-rmw.clif @@ -0,0 +1,210 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %atomic_rmw_add_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 add v0, v1 + return +} + +; block0: +; amoadd.d.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_add_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 add v0, v1 + return +} + +; block0: +; amoadd.w.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_sub_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 sub v0, v1 + return +} + +; block0: +; sub a1,zero,a1 +; amoadd.d.aqrl a2,a1,(a0) +; ret + +function %atomic_rmw_sub_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 sub v0, v1 + return +} + +; block0: +; sub a1,zero,a1 +; amoadd.w.aqrl a2,a1,(a0) +; ret + +function %atomic_rmw_and_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 and v0, v1 + return +} + +; block0: +; amoand.d.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_and_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 and v0, v1 + return +} + +; block0: +; amoand.w.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_nand_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 nand v0, v1 + return +} + +; block0: +; mv a3,a0 +; mv a2,a1 +; atomic_rmw.i64 nand a0,a2,(a3)##t0=a1 offset=zero +; ret + +function %atomic_rmw_nand_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 nand v0, v1 + return +} + +; block0: +; mv a3,a0 +; mv a2,a1 +; atomic_rmw.i32 nand a0,a2,(a3)##t0=a1 offset=zero +; ret + +function %atomic_rmw_or_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 or v0, v1 + return +} + +; block0: +; amoor.d.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_or_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 or v0, v1 + return +} + +; block0: +; amoor.w.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_xor_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 xor v0, v1 + return +} + +; block0: +; amoxor.d.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_xor_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 xor v0, v1 + return +} + +; block0: +; amoxor.w.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_smax_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smax v0, v1 + return +} + +; block0: +; amomax.d.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_smax_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 smax v0, v1 + return +} + +; block0: +; amomax.w.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_umax_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umax v0, v1 + return +} + +; block0: +; amomaxu.d.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_umax_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 umax v0, v1 + return +} + +; block0: +; amomaxu.w.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_smin_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smin v0, v1 + return +} + +; block0: +; amomin.d.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_smin_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 smin v0, v1 + return +} + +; block0: +; amomin.w.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_umin_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umin v0, v1 + return +} + +; block0: +; amominu.d.aqrl a0,a1,(a0) +; ret + +function %atomic_rmw_umin_i32(i64, i32) { +block0(v0: i64, v1: i32): + v2 = atomic_rmw.i32 umin v0, v1 + return +} + +; block0: +; amominu.w.aqrl a0,a1,(a0) +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/atomic_load.clif b/cranelift/filetests/filetests/isa/riscv64/atomic_load.clif new file mode 100644 index 000000000000..0cfc646b707b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/atomic_load.clif @@ -0,0 +1,36 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %atomic_load_i64(i64) -> i64 { +block0(v0: i64): + v1 = atomic_load.i64 v0 + return v1 +} + +; block0: +; atomic_load.i64 a0,(a0) +; ret + +function %atomic_load_i32(i64) -> i32 { +block0(v0: i64): + v1 = atomic_load.i32 v0 + return v1 +} + +; block0: +; atomic_load.i32 a0,(a0) +; ret + +function %atomic_load_i32_i64(i64) -> i64 { +block0(v0: i64): + v1 = atomic_load.i32 v0 + v2 = uextend.i64 v1 + return v2 +} + +; block0: +; atomic_load.i32 a1,(a0) +; uext.w a0,a1 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/atomic_store.clif b/cranelift/filetests/filetests/isa/riscv64/atomic_store.clif new file mode 100644 index 000000000000..74bdcfd0b724 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/atomic_store.clif @@ -0,0 +1,76 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %atomic_store_i64(i64, i64) { +block0(v0: i64, v1: i64): + atomic_store.i64 v0, v1 + return +} + +; block0: +; atomic_store.i64 a0,(a1) +; ret + +function %atomic_store_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + atomic_store.i64 v0, v1 + return +} + +; block0: +; load_sym t2,%sym+0 +; atomic_store.i64 a0,(t2) +; ret + +function %atomic_store_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + atomic_store.i64 v1, v0 + return +} + +; block0: +; lui t2,3 +; addi t2,t2,57 +; atomic_store.i64 t2,(a0) +; ret + +function %atomic_store_i32(i32, i64) { +block0(v0: i32, v1: i64): + atomic_store.i32 v0, v1 + return +} + +; block0: +; atomic_store.i32 a0,(a1) +; ret + +function %atomic_store_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + atomic_store.i32 v0, v1 + return +} + +; block0: +; load_sym t2,%sym+0 +; atomic_store.i32 a0,(t2) +; ret + +function %atomic_store_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + atomic_store.i32 v1, v0 + return +} + +; block0: +; lui t2,3 +; addi t2,t2,57 +; atomic_store.i32 t2,(a0) +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/bitops.clif b/cranelift/filetests/filetests/isa/riscv64/bitops.clif new file mode 100644 index 000000000000..7bf130093550 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/bitops.clif @@ -0,0 +1,929 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %a(i8) -> i8 { +block0(v0: i8): + v1 = bitrev v0 + return v1 +} + +; block0: +; brev8 a4,a0##tmp=a3 tmp2=a1 step=a2 ty=i8 +; mv a0,a4 +; ret + +function %a(i16) -> i16 { +block0(v0: i16): + v1 = bitrev v0 + return v1 +} + +; block0: +; mv t3,a0 +; brev8 a3,t3##tmp=a0 tmp2=a1 step=a2 ty=i16 +; rev8 a5,a3##step=a7 tmp=a6 +; srli a0,a5,48 +; ret + +function %a(i32) -> i32 { +block0(v0: i32): + v1 = bitrev v0 + return v1 +} + +; block0: +; mv t3,a0 +; brev8 a3,t3##tmp=a0 tmp2=a1 step=a2 ty=i32 +; rev8 a5,a3##step=a7 tmp=a6 +; srli a0,a5,32 +; ret + +function %a(i64) -> i64 { +block0(v0: i64): + v1 = bitrev v0 + return v1 +} + +; block0: +; rev8 a3,a0##step=a2 tmp=a1 +; brev8 a0,a3##tmp=a4 tmp2=a5 step=a6 ty=i64 +; ret + +function %a(i128) -> i128 { +block0(v0: i128): + v1 = bitrev v0 + return v1 +} + +; block0: +; rev8 a2,a0##step=a4 tmp=a3 +; brev8 t4,a2##tmp=a6 tmp2=a7 step=t3 ty=i64 +; rev8 t1,a1##step=a0 tmp=t2 +; brev8 a0,t1##tmp=a2 tmp2=a3 step=a4 ty=i64 +; mv a1,t4 +; ret + +function %b(i8) -> i8 { +block0(v0: i8): + v1 = clz v0 + return v1 +} + +; block0: +; clz a3,a0##ty=i8 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %b(i16) -> i16 { +block0(v0: i16): + v1 = clz v0 + return v1 +} + +; block0: +; clz a3,a0##ty=i16 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %b(i32) -> i32 { +block0(v0: i32): + v1 = clz v0 + return v1 +} + +; block0: +; clz a3,a0##ty=i32 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %b(i64) -> i64 { +block0(v0: i64): + v1 = clz v0 + return v1 +} + +; block0: +; clz a3,a0##ty=i64 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %b(i128) -> i128 { +block0(v0: i128): + v1 = clz v0 + return v1 +} + +; block0: +; clz a4,a1##ty=i64 tmp=a2 step=a3 +; clz t3,a0##ty=i64 tmp=a6 step=a7 +; li t0,64 +; select_reg t2,t3,zero##condition=(t0 eq a4) +; add a0,a4,t2 +; mv a1,zero +; ret + +function %c(i8) -> i8 { +block0(v0: i8): + v1 = cls v0 + return v1 +} + +; block0: +; sext.b a1,a0 +; not a2,a0 +; select_reg a4,a2,a0##condition=(a1 slt zero) +; clz t3,a4##ty=i8 tmp=a6 step=a7 +; addi a0,t3,-1 +; ret + +function %c(i16) -> i16 { +block0(v0: i16): + v1 = cls v0 + return v1 +} + +; block0: +; sext.h a1,a0 +; not a2,a0 +; select_reg a4,a2,a0##condition=(a1 slt zero) +; clz t3,a4##ty=i16 tmp=a6 step=a7 +; addi a0,t3,-1 +; ret + +function %c(i32) -> i32 { +block0(v0: i32): + v1 = cls v0 + return v1 +} + +; block0: +; sext.w a1,a0 +; not a2,a0 +; select_reg a4,a2,a0##condition=(a1 slt zero) +; clz t3,a4##ty=i32 tmp=a6 step=a7 +; addi a0,t3,-1 +; ret + +function %c(i64) -> i64 { +block0(v0: i64): + v1 = cls v0 + return v1 +} + +; block0: +; not a1,a0 +; select_reg a2,a1,a0##condition=(a0 slt zero) +; clz a6,a2##ty=i64 tmp=a4 step=a5 +; addi a0,a6,-1 +; ret + +function %c(i128) -> i128 { +block0(v0: i128): + v1 = cls v0 + return v1 +} + +; block0: +; not a2,a0 +; select_reg a4,a2,a0##condition=(a1 slt zero) +; not a6,a1 +; select_reg t3,a6,a1##condition=(a1 slt zero) +; clz t2,t3##ty=i64 tmp=t0 step=t1 +; clz a3,a4##ty=i64 tmp=a1 step=a2 +; li a5,64 +; select_reg a7,a3,zero##condition=(a5 eq t2) +; add t4,t2,a7 +; addi a0,t4,-1 +; mv a1,zero +; ret + +function %d(i8) -> i8 { +block0(v0: i8): + v1 = ctz v0 + return v1 +} + +; block0: +; ctz a3,a0##ty=i8 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %d(i16) -> i16 { +block0(v0: i16): + v1 = ctz v0 + return v1 +} + +; block0: +; ctz a3,a0##ty=i16 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %d(i32) -> i32 { +block0(v0: i32): + v1 = ctz v0 + return v1 +} + +; block0: +; ctz a3,a0##ty=i32 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %d(i64) -> i64 { +block0(v0: i64): + v1 = ctz v0 + return v1 +} + +; block0: +; ctz a3,a0##ty=i64 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %d(i128) -> i128 { +block0(v0: i128): + v1 = ctz v0 + return v1 +} + +; block0: +; ctz a4,a0##ty=i64 tmp=a2 step=a3 +; ctz t3,a1##ty=i64 tmp=a6 step=a7 +; li t0,64 +; select_reg t2,t3,zero##condition=(t0 eq a4) +; add a0,a4,t2 +; mv a1,zero +; ret + +function %d(i128) -> i128 { +block0(v0: i128): + v1 = popcnt v0 + return v1 +} + +; block0: +; popcnt a4,a0##ty=i64 tmp=a2 step=a3 +; popcnt t3,a1##ty=i64 tmp=a6 step=a7 +; add a0,a4,t3 +; mv a1,zero +; ret + +function %d(i64) -> i64 { +block0(v0: i64): + v1 = popcnt v0 + return v1 +} + +; block0: +; popcnt a3,a0##ty=i64 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %d(i32) -> i32 { +block0(v0: i32): + v1 = popcnt v0 + return v1 +} + +; block0: +; popcnt a3,a0##ty=i32 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %d(i16) -> i16 { +block0(v0: i16): + v1 = popcnt v0 + return v1 +} + +; block0: +; popcnt a3,a0##ty=i16 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %d(i8) -> i8 { +block0(v0: i8): + v1 = popcnt v0 + return v1 +} + +; block0: +; popcnt a3,a0##ty=i8 tmp=a2 step=a1 +; mv a0,a3 +; ret + +function %bextend_b8() -> b32 { +block0: + v1 = bconst.b8 true + v2 = bextend.b32 v1 + return v2 +} + +; block0: +; li a0,-1 +; ret + +function %bextend_b1() -> b32 { +block0: + v1 = bconst.b1 true + v2 = bextend.b32 v1 + return v2 +} + +; block0: +; li a0,-1 +; ret + +function %bnot_i32(i32) -> i32 { +block0(v0: i32): + v1 = bnot v0 + return v1 +} + +; block0: +; not a0,a0 +; ret + +function %bnot_i64(i64) -> i64 { +block0(v0: i64): + v1 = bnot v0 + return v1 +} + +; block0: +; not a0,a0 +; ret + +function %bnot_i64_with_shift(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = ishl.i64 v0, v1 + v3 = bnot v2 + return v3 +} + +; block0: +; slli a1,a0,3 +; not a0,a1 +; ret + +function %bnot_i128(i128) -> i128 { +block0(v0: i128): + v1 = bnot v0 + return v1 +} + +; block0: +; not a0,a0 +; not a1,a1 +; ret + +function %band_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band v0, v1 + return v2 +} + +; block0: +; and a0,a0,a1 +; ret + +function %band_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band v0, v1 + return v2 +} + +; block0: +; and a0,a0,a1 +; ret + +function %band_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = band v0, v1 + return v2 +} + +; block0: +; and a0,a0,a2 +; and a1,a1,a3 +; ret + +function %band_i64_constant(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = band v0, v1 + return v2 +} + +; block0: +; andi a0,a0,3 +; ret + +function %band_i64_constant2(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = band v1, v0 + return v2 +} + +; block0: +; andi a0,a0,3 +; ret + +function %band_i64_constant_shift(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 3 + v3 = ishl.i64 v1, v2 + v4 = band v0, v3 + return v4 +} + +; block0: +; slli a2,a1,3 +; and a0,a0,a2 +; ret + +function %band_i64_constant_shift2(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 3 + v3 = ishl.i64 v1, v2 + v4 = band v3, v0 + return v4 +} + +; block0: +; slli a2,a1,3 +; and a0,a2,a0 +; ret + +function %bor_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor v0, v1 + return v2 +} + +; block0: +; or a0,a0,a1 +; ret + +function %bor_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor v0, v1 + return v2 +} + +; block0: +; or a0,a0,a1 +; ret + +function %bor_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bor v0, v1 + return v2 +} + +; block0: +; or a0,a0,a2 +; or a1,a1,a3 +; ret + +function %bor_i64_constant(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = bor v0, v1 + return v2 +} + +; block0: +; ori a0,a0,3 +; ret + +function %bor_i64_constant2(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = bor v1, v0 + return v2 +} + +; block0: +; ori a0,a0,3 +; ret + +function %bor_i64_constant_shift(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 3 + v3 = ishl.i64 v1, v2 + v4 = bor v0, v3 + return v4 +} + +; block0: +; slli a2,a1,3 +; or a0,a0,a2 +; ret + +function %bor_i64_constant_shift2(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 3 + v3 = ishl.i64 v1, v2 + v4 = bor v3, v0 + return v4 +} + +; block0: +; slli a2,a1,3 +; or a0,a2,a0 +; ret + +function %bxor_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor v0, v1 + return v2 +} + +; block0: +; xor a0,a0,a1 +; ret + +function %bxor_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor v0, v1 + return v2 +} + +; block0: +; xor a0,a0,a1 +; ret + +function %bxor_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bxor v0, v1 + return v2 +} + +; block0: +; xor a0,a0,a2 +; xor a1,a1,a3 +; ret + +function %bxor_i64_constant(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = bxor v0, v1 + return v2 +} + +; block0: +; xori a0,a0,3 +; ret + +function %bxor_i64_constant2(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = bxor v1, v0 + return v2 +} + +; block0: +; xori a0,a0,3 +; ret + +function %bxor_i64_constant_shift(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 3 + v3 = ishl.i64 v1, v2 + v4 = bxor v0, v3 + return v4 +} + +; block0: +; slli a2,a1,3 +; xor a0,a0,a2 +; ret + +function %bxor_i64_constant_shift2(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 3 + v3 = ishl.i64 v1, v2 + v4 = bxor v3, v0 + return v4 +} + +; block0: +; slli a2,a1,3 +; xor a0,a2,a0 +; ret + +function %band_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band_not v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; and a0,a0,a1 +; ret + +function %band_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band_not v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; and a0,a0,a1 +; ret + +function %band_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = band_not v0, v1 + return v2 +} + +; block0: +; not a4,a2 +; and a0,a0,a4 +; not t3,a3 +; and a1,a1,t3 +; ret + +function %band_not_i64_constant(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 4 + v2 = band_not v0, v1 + return v2 +} + +; block0: +; li a1,4 +; not a2,a1 +; and a0,a0,a2 +; ret + +function %band_not_i64_constant_shift(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 4 + v3 = ishl.i64 v1, v2 + v4 = band_not v0, v3 + return v4 +} + +; block0: +; slli a3,a1,4 +; not a2,a3 +; and a0,a0,a2 +; ret + +function %bor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor_not v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; or a0,a0,a1 +; ret + +function %bor_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor_not v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; or a0,a0,a1 +; ret + +function %bor_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bor_not v0, v1 + return v2 +} + +; block0: +; not a4,a2 +; or a0,a0,a4 +; not t3,a3 +; or a1,a1,t3 +; ret + +function %bor_not_i64_constant(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 4 + v2 = bor_not v0, v1 + return v2 +} + +; block0: +; li a1,4 +; not a2,a1 +; or a0,a0,a2 +; ret + +function %bor_not_i64_constant_shift(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 4 + v3 = ishl.i64 v1, v2 + v4 = bor_not v0, v3 + return v4 +} + +; block0: +; slli a3,a1,4 +; not a2,a3 +; or a0,a0,a2 +; ret + +function %bxor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor_not v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; xor a0,a0,a1 +; ret + +function %bxor_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor_not v0, v1 + return v2 +} + +; block0: +; not a1,a1 +; xor a0,a0,a1 +; ret + +function %bxor_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bxor_not v0, v1 + return v2 +} + +; block0: +; not a4,a2 +; xor a0,a0,a4 +; not t3,a3 +; xor a1,a1,t3 +; ret + +function %bxor_not_i64_constant(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 4 + v2 = bxor_not v0, v1 + return v2 +} + +; block0: +; li a1,4 +; not a2,a1 +; xor a0,a0,a2 +; ret + +function %bxor_not_i64_constant_shift(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iconst.i64 4 + v3 = ishl.i64 v1, v2 + v4 = bxor_not v0, v3 + return v4 +} + +; block0: +; slli a3,a1,4 +; not a2,a3 +; xor a0,a0,a2 +; ret + +function %ishl_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = ishl.i128 v0, v1 + return v2 +} + +; block0: +; andi a3,a2,127 +; li a5,128 +; sub a5,a5,a3 +; sll t3,a0,a3 +; srl t0,a0,a5 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; sll a1,a1,a3 +; or a4,t2,a1 +; li a5,64 +; select_reg a0,zero,t3##condition=(a3 uge a5) +; select_reg a1,t3,a4##condition=(a3 uge a5) +; ret + +function %ishl_i128_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = ishl.i128 v0, v1 + return v2 +} + +; block0: +; andi a4,a2,127 +; li a6,128 +; sub a6,a6,a4 +; sll t4,a0,a4 +; srl t1,a0,a6 +; select_reg a0,zero,t1##condition=(a4 eq zero) +; sll a2,a1,a4 +; or a5,a0,a2 +; li a6,64 +; select_reg a0,zero,t4##condition=(a4 uge a6) +; select_reg a1,t4,a5##condition=(a4 uge a6) +; ret + +function %ushr_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = ushr.i128 v0, v1 + return v2 +} + +; block0: +; andi a3,a2,127 +; li a5,128 +; sub a5,a5,a3 +; sll t3,a1,a5 +; select_reg t0,zero,t3##condition=(a3 eq zero) +; srl t2,a0,a3 +; or a2,t0,t2 +; li a4,64 +; srl a5,a1,a3 +; select_reg a0,a5,a2##condition=(a3 uge a4) +; select_reg a1,zero,a5##condition=(a3 uge a4) +; ret + +function %ushr_i128_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = ushr.i128 v0, v1 + return v2 +} + +; block0: +; andi a4,a2,127 +; li a6,128 +; sub a6,a6,a4 +; sll t4,a1,a6 +; select_reg t1,zero,t4##condition=(a4 eq zero) +; srl a0,a0,a4 +; or a2,t1,a0 +; li a5,64 +; srl a6,a1,a4 +; select_reg a0,a6,a2##condition=(a4 uge a5) +; select_reg a1,zero,a6##condition=(a4 uge a5) +; ret + +function %sshr_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = sshr.i128 v0, v1 + return v2 +} + +; block0: +; andi a3,a2,127 +; li a5,128 +; sub a5,a5,a3 +; sll t3,a1,a5 +; select_reg t0,zero,t3##condition=(a3 eq zero) +; srl t2,a0,a3 +; or a2,t0,t2 +; li a4,64 +; sra a5,a1,a3 +; li a7,-1 +; select_reg t4,a7,zero##condition=(a1 slt zero) +; select_reg a0,a5,a2##condition=(a3 uge a4) +; select_reg a1,t4,a5##condition=(a3 uge a4) +; ret + +function %sshr_i128_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = sshr.i128 v0, v1 + return v2 +} + +; block0: +; andi a4,a2,127 +; li a6,128 +; sub a6,a6,a4 +; sll t4,a1,a6 +; select_reg t1,zero,t4##condition=(a4 eq zero) +; srl a0,a0,a4 +; or a2,t1,a0 +; li a5,64 +; sra a6,a1,a4 +; li t3,-1 +; select_reg t0,t3,zero##condition=(a1 slt zero) +; select_reg a0,a6,a2##condition=(a4 uge a5) +; select_reg a1,t0,a6##condition=(a4 uge a5) +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/call-indirect.clif b/cranelift/filetests/filetests/isa/riscv64/call-indirect.clif new file mode 100644 index 000000000000..64b2a5fc2e34 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/call-indirect.clif @@ -0,0 +1,22 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f(i64, i64) -> i64 { + sig0 = (i64) -> i64 +block0(v0: i64, v1: i64): + v2 = call_indirect.i64 sig0, v1(v0) + return v2 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; callind a1 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/call.clif b/cranelift/filetests/filetests/isa/riscv64/call.clif new file mode 100644 index 000000000000..ccc7ad299b78 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/call.clif @@ -0,0 +1,424 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f1(i64) -> i64 { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; load_sym a1,%g+0 +; callind a1 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f2(i32) -> i64 { + fn0 = %g(i32 uext) -> i64 + +block0(v0: i32): + v1 = call fn0(v0) + return v1 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; uext.w a0,a0 +; load_sym a3,%g+0 +; callind a3 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f3(i32) -> i32 uext { +block0(v0: i32): + return v0 +} + +; block0: +; uext.w a0,a0 +; ret + +function %f4(i32) -> i64 { + fn0 = %g(i32 sext) -> i64 + +block0(v0: i32): + v1 = call fn0(v0) + return v1 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; sext.w a0,a0 +; load_sym a3,%g+0 +; callind a3 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f5(i32) -> i32 sext { +block0(v0: i32): + return v0 +} + +; block0: +; sext.w a0,a0 +; ret + +function %f6(i8) -> i64 { + fn0 = %g(i32, i32, i32, i32, i32, i32, i32, i32, i8 sext) -> i64 + +block0(v0: i8): + v1 = iconst.i32 42 + v2 = call fn0(v1, v1, v1, v1, v1, v1, v1, v1, v0) + return v2 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; mv t3,a0 +; add sp,-16 +; virtual_sp_offset_adj +16 +; li a0,42 +; li a1,42 +; li a2,42 +; li a3,42 +; li a4,42 +; li a5,42 +; li a6,42 +; li a7,42 +; sext.b t3,t3 +; sd t3,0(sp) +; load_sym t4,%g+0 +; callind t4 +; add sp,+16 +; virtual_sp_offset_adj -16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f7(i8) -> i32, i32, i32, i32, i32, i32, i32, i32, i8 sext { +block0(v0: i8): + v1 = iconst.i32 42 + return v1, v1, v1, v1, v1, v1, v1, v1, v0 +} + +; block0: +; mv a7,a0 +; mv a6,a1 +; li a0,42 +; li a1,42 +; li a2,42 +; li a5,42 +; li t3,42 +; li t1,42 +; li a3,42 +; li a4,42 +; mv t2,a7 +; mv t0,a6 +; sw a2,0(t0) +; sw a5,8(t0) +; sw t3,16(t0) +; sw t1,24(t0) +; sw a3,32(t0) +; sw a4,40(t0) +; sext.b t2,t2 +; sd t2,48(t0) +; ret + +function %f8() { + fn0 = %g0() -> f32 + fn1 = %g1() -> f64 + fn2 = %g2() + fn3 = %g3(f32) + fn4 = %g4(f64) + +block0: + v0 = call fn0() + v1 = call fn1() + v2 = call fn1() + call fn2() + call fn3(v0) + call fn4(v1) + call fn4(v2) + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; fsd fs2,-8(sp) +; fsd fs3,-16(sp) +; fsd fs11,-24(sp) +; add sp,-32 +; block0: +; load_sym a6,%g0+0 +; callind a6 +; fmv.d fs11,fa0 +; load_sym a6,%g1+0 +; callind a6 +; fmv.d fs2,fa0 +; load_sym a6,%g1+0 +; callind a6 +; fmv.d fs3,fa0 +; load_sym a6,%g2+0 +; callind a6 +; load_sym a7,%g3+0 +; fmv.d fa0,fs11 +; callind a7 +; load_sym t3,%g4+0 +; fmv.d fa0,fs2 +; callind t3 +; load_sym t4,%g4+0 +; fmv.d fa0,fs3 +; callind t4 +; add sp,+32 +; fld fs2,-8(sp) +; fld fs3,-16(sp) +; fld fs11,-24(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f11(i128, i64) -> i64 { +block0(v0: i128, v1: i64): + v2, v3 = isplit v0 + return v3 +} + +; block0: +; mv a2,a0 +; mv a0,a1 +; ret + +function %f11_call(i64) -> i64 { + fn0 = %f11(i128, i64) -> i64 + +block0(v0: i64): + v1 = iconst.i64 42 + v2 = iconcat v1, v0 + v3 = call fn0(v2, v1) + return v3 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; mv a6,a0 +; li a0,42 +; mv a1,a6 +; li a2,42 +; load_sym a6,%f11+0 +; callind a6 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f12(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2, v3 = isplit v1 + return v2 +} + +; block0: +; mv a0,a1 +; ret + +function %f12_call(i64) -> i64 { + fn0 = %f12(i64, i128) -> i64 + +block0(v0: i64): + v1 = iconst.i64 42 + v2 = iconcat v0, v1 + v3 = call fn0(v1, v2) + return v3 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; mv a1,a0 +; li a2,42 +; li a0,42 +; load_sym a6,%f12+0 +; callind a6 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f13(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2, v3 = isplit v1 + return v2 +} + +; block0: +; mv a0,a1 +; ret + +function %f13_call(i64) -> i64 { + fn0 = %f13(i64, i128) -> i64 + +block0(v0: i64): + v1 = iconst.i64 42 + v2 = iconcat v0, v1 + v3 = call fn0(v1, v2) + return v3 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; mv a1,a0 +; li a2,42 +; li a0,42 +; load_sym a6,%f13+0 +; callind a6 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f14(i128, i128, i128, i64, i128) -> i128 { +block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128): + return v4 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; ld a1,16(fp) +; mv a0,a7 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f14_call(i128, i64) -> i128 { + fn0 = %f14(i128, i128, i128, i64, i128) -> i128 + +block0(v0: i128, v1: i64): + v2 = call fn0(v0, v0, v0, v1, v0) + return v2 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; mv a7,a0 +; mv a6,a2 +; add sp,-16 +; virtual_sp_offset_adj +16 +; sd a1,0(sp) +; mv a5,a1 +; load_sym t3,%f14+0 +; mv a1,a5 +; mv a3,a5 +; mv a0,a7 +; mv a2,a7 +; mv a4,a7 +; callind t3 +; add sp,+16 +; virtual_sp_offset_adj -16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f15(i128, i128, i128, i64, i128) -> i128{ +block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128): + return v4 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; ld a1,16(fp) +; mv a0,a7 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f15_call(i128, i64) -> i128 { + fn0 = %f15(i128, i128, i128, i64, i128) -> i128 + +block0(v0: i128, v1: i64): + v2 = call fn0(v0, v0, v0, v1, v0) + return v2 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; mv a7,a0 +; mv a6,a2 +; add sp,-16 +; virtual_sp_offset_adj +16 +; sd a1,0(sp) +; mv a5,a1 +; load_sym t3,%f15+0 +; mv a1,a5 +; mv a3,a5 +; mv a0,a7 +; mv a2,a7 +; mv a4,a7 +; callind t3 +; add sp,+16 +; virtual_sp_offset_adj -16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f16() -> i32, i32 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + return v0, v1 +} + +; block0: +; li a0,0 +; li a1,1 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/condbr.clif b/cranelift/filetests/filetests/isa/riscv64/condbr.clif new file mode 100644 index 000000000000..0142b3842371 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/condbr.clif @@ -0,0 +1,391 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + return v2 +} + +; block0: +; eq a0,a0,a1##ty=i64 +; ret + +function %icmp_eq_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp eq v0, v1 + return v2 +} + +; block0: +; eq a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_ne_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp ne v0, v1 + return v2 +} + +; block0: +; ne a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_slt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp slt v0, v1 + return v2 +} + +; block0: +; slt a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_ult_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp ult v0, v1 + return v2 +} + +; block0: +; ult a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_sle_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp sle v0, v1 + return v2 +} + +; block0: +; sle a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_ule_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp ule v0, v1 + return v2 +} + +; block0: +; ule a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_sgt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp sgt v0, v1 + return v2 +} + +; block0: +; sgt a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_ugt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp ugt v0, v1 + return v2 +} + +; block0: +; ugt a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_sge_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp sge v0, v1 + return v2 +} + +; block0: +; sge a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %icmp_uge_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp uge v0, v1 + return v2 +} + +; block0: +; uge a0,[a0,a1],[a2,a3]##ty=i128 +; ret + +function %f(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ifcmp v0, v1 + brif eq v2, block1 + jump block2 + +block1: + v4 = iconst.i64 1 + return v4 + +block2: + v5 = iconst.i64 2 + return v5 +} + +; block0: +; eq a3,a0,a1##ty=i64 +; bne a3,zero,taken(label1),not_taken(label2) +; block1: +; li a0,1 +; ret +; block2: +; li a0,2 +; ret + +function %f(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ifcmp v0, v1 + brif eq v2, block1 + jump block1 + +block1: + v4 = iconst.i64 1 + return v4 +} + +; block0: +; eq a2,a0,a1##ty=i64 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; li a0,1 +; ret + +function %i128_brz(i128){ +block0(v0: i128): + brz v0, block1 + jump block1 + +block1: + nop + return +} + +; block0: +; bne a1,zero,taken(label2),not_taken(0) +; beq a0,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_brnz(i128){ +block0(v0: i128): + brnz v0, block1 + jump block1 + +block1: + nop + return +} + +; block0: +; bne a1,zero,taken(label1),not_taken(0) +; bne a0,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_eq(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp eq v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; eq a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_ne(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp ne v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; ne a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_slt(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp slt v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; slt a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_ult(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp ult v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; ult a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_sle(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp sle v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; sle a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_ule(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp ule v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; ule a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_sgt(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp sgt v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; sgt a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_ugt(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp ugt v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; ugt a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_sge(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp sge v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; sge a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + +function %i128_bricmp_uge(i128, i128) { +block0(v0: i128, v1: i128): + br_icmp uge v0, v1, block1 + jump block1 + +block1: + return +} + +; block0: +; uge a2,[a0,a1],[a2,a3]##ty=i128 +; bne a2,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/condops.clif b/cranelift/filetests/filetests/isa/riscv64/condops.clif new file mode 100644 index 000000000000..b1594909bfe5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/condops.clif @@ -0,0 +1,86 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f(i8, i64, i64) -> i64 { +block0(v0: i8, v1: i64, v2: i64): + v3 = iconst.i8 42 + v4 = ifcmp v0, v3 + v5 = selectif.i64 eq v4, v1, v2 + return v5 +} + +; block0: +; li a3,42 +; uext.b a5,a0 +; uext.b a7,a3 +; eq t4,a5,a7##ty=i8 +; selectif a0,a1,a2##test=t4 +; ret + +function %g(i8) -> b1 { +block0(v0: i8): + v3 = iconst.i8 42 + v4 = ifcmp v0, v3 + v5 = trueif eq v4 + return v5 +} + +; block0: +; mv a5,a0 +; li a0,42 +; uext.b a2,a5 +; uext.b a4,a0 +; eq a0,a2,a4##ty=i8 +; ret + +function %h(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = bitselect.i8 v0, v1, v2 + return v3 +} + +; block0: +; mv t3,a2 +; and a2,a0,a1 +; not a4,a0 +; and a6,a4,t3 +; or a0,a2,a6 +; ret + +function %i(b1, i8, i8) -> i8 { +block0(v0: b1, v1: i8, v2: i8): + v3 = select.i8 v0, v1, v2 + return v3 +} + +; block0: +; select_i8 a0,a1,a2##condition=a0 +; ret + +function %i(i32, i8, i8) -> i8 { +block0(v0: i32, v1: i8, v2: i8): + v3 = iconst.i32 42 + v4 = icmp.i32 eq v0, v3 + v5 = select.i8 v4, v1, v2 + return v5 +} + +; block0: +; li a3,42 +; uext.w a5,a0 +; uext.w a7,a3 +; eq t4,a5,a7##ty=i32 +; select_i8 a0,a1,a2##condition=t4 +; ret + +function %i128_select(b1, i128, i128) -> i128 { +block0(v0: b1, v1: i128, v2: i128): + v3 = select.i128 v0, v1, v2 + return v3 +} + +; block0: +; select_i128 [a0,a1],[a1,a2],[a3,a4]##condition=a0 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/constants.clif b/cranelift/filetests/filetests/isa/riscv64/constants.clif new file mode 100644 index 000000000000..39f0a095b6cd --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/constants.clif @@ -0,0 +1,328 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f() -> b8 { +block0: + v0 = bconst.b8 true + return v0 +} + +; block0: +; li a0,-1 +; ret + +function %f() -> b16 { +block0: + v0 = bconst.b16 false + return v0 +} + +; block0: +; li a0,0 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0 + return v0 +} + +; block0: +; li a0,0 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff + return v0 +} + +; block0: +; lui a0,16 +; addi a0,a0,4095 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff0000 + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xffff0000 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff00000000 + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xffff00000000 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff000000000000 + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xffff000000000000 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffffffffffffffff + return v0 +} + +; block0: +; li a0,-1 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffffffffffff0000 + return v0 +} + +; block0: +; lui a0,1048560 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffffffff0000ffff + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xffffffff0000ffff +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff0000ffffffff + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xffff0000ffffffff +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0x0000ffffffffffff + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xffffffffffff +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xf34bf0a31212003a ;; random digits + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xf34bf0a31212003a +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0x12e900001ef40000 ;; random digits with 2 clear half words + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0x12e900001ef40000 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0x12e9ffff1ef4ffff ;; random digits with 2 full half words + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0x12e9ffff1ef4ffff +; ret + +function %f() -> i32 { +block0: + v0 = iconst.i32 -1 + return v0 +} + +; block0: +; li a0,-1 +; ret + +function %f() -> i32 { +block0: + v0 = iconst.i32 0xfffffff7 + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xfffffff7 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xfffffff7 + return v0 +} + +; block0: +; auipc a0,0 +; ld a0,12(a0) +; j 12 +; .8byte 0xfffffff7 +; ret + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xfffffffffffffff7 + return v0 +} + +; block0: +; li a0,-9 +; ret + +function %f() -> f64 { +block0: + v0 = f64const 0x1.0 + return v0 +} + +; block0: +; auipc t2,0 +; ld t2,12(t2) +; j 12 +; .8byte 0x3ff0000000000000 +; fmv.d.x fa0,t2 +; ret + +function %f() -> f32 { +block0: + v0 = f32const 0x5.0 + return v0 +} + +; block0: +; lui t2,264704 +; fmv.w.x fa0,t2 +; ret + +function %f() -> f64 { +block0: + v0 = f64const 0x32.0 + return v0 +} + +; block0: +; auipc t2,0 +; ld t2,12(t2) +; j 12 +; .8byte 0x4049000000000000 +; fmv.d.x fa0,t2 +; ret + +function %f() -> f32 { +block0: + v0 = f32const 0x32.0 + return v0 +} + +; block0: +; lui t2,271488 +; fmv.w.x fa0,t2 +; ret + +function %f() -> f64 { +block0: + v0 = f64const 0x0.0 + return v0 +} + +; block0: +; li t2,0 +; fmv.d.x fa0,t2 +; ret + +function %f() -> f32 { +block0: + v0 = f32const 0x0.0 + return v0 +} + +; block0: +; li t2,0 +; fmv.w.x fa0,t2 +; ret + +function %f() -> f64 { +block0: + v0 = f64const -0x10.0 + return v0 +} + +; block0: +; auipc t2,0 +; ld t2,12(t2) +; j 12 +; .8byte 0xc030000000000000 +; fmv.d.x fa0,t2 +; ret + +function %f() -> f32 { +block0: + v0 = f32const -0x10.0 + return v0 +} + +; block0: +; auipc t2,0 +; lwu t2,12(t2) +; j 8 +; .4byte 0xc1800000 +; fmv.w.x fa0,t2 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/extend-op.clif b/cranelift/filetests/filetests/isa/riscv64/extend-op.clif new file mode 100644 index 000000000000..26588ad12a45 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/extend-op.clif @@ -0,0 +1,119 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f(i8) -> i64 { +block0(v0: i8): + v1 = sextend.i64 v0 + v2 = iconst.i64 42 + v3 = iadd.i64 v2, v1 + return v3 +} + +; block0: +; sext.b a1,a0 +; addi a0,a1,42 +; ret + +function %f2(i8, i64) -> i64 { +block0(v0: i8, v1: i64): + v2 = sextend.i64 v0 + v3 = iadd.i64 v2, v1 + return v3 +} + +; block0: +; sext.b a2,a0 +; add a0,a2,a1 +; ret + +function %i128_uextend_i64(i64) -> i128 { +block0(v0: i64): + v1 = uextend.i128 v0 + return v1 +} + +; block0: +; mv a1,zero +; ret + +function %i128_sextend_i64(i64) -> i128 { +block0(v0: i64): + v1 = sextend.i128 v0 + return v1 +} + +; block0: +; slt a1,a0,zero +; sext.b1 a1,a1 +; ret + +function %i128_uextend_i32(i32) -> i128 { +block0(v0: i32): + v1 = uextend.i128 v0 + return v1 +} + +; block0: +; uext.w a0,a0 +; mv a1,zero +; ret + +function %i128_sextend_i32(i32) -> i128 { +block0(v0: i32): + v1 = sextend.i128 v0 + return v1 +} + +; block0: +; sext.w a1,a0 +; slt a3,a1,zero +; sext.b1 a1,a3 +; ret + +function %i128_uextend_i16(i16) -> i128 { +block0(v0: i16): + v1 = uextend.i128 v0 + return v1 +} + +; block0: +; uext.h a0,a0 +; mv a1,zero +; ret + +function %i128_sextend_i16(i16) -> i128 { +block0(v0: i16): + v1 = sextend.i128 v0 + return v1 +} + +; block0: +; sext.h a1,a0 +; slt a3,a1,zero +; sext.b1 a1,a3 +; ret + +function %i128_uextend_i8(i8) -> i128 { +block0(v0: i8): + v1 = uextend.i128 v0 + return v1 +} + +; block0: +; uext.b a0,a0 +; mv a1,zero +; ret + +function %i128_sextend_i8(i8) -> i128 { +block0(v0: i8): + v1 = sextend.i128 v0 + return v1 +} + +; block0: +; sext.b a1,a0 +; slt a3,a1,zero +; sext.b1 a1,a3 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif b/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif new file mode 100644 index 000000000000..a5b5b758fd91 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/fcvt-small.clif @@ -0,0 +1,84 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function u0:0(i8) -> f32 { +block0(v0: i8): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; fcvt.s.lu fa0,a0 +; ret + +function u0:0(i8) -> f64 { +block0(v0: i8): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; fcvt.d.lu fa0,a0 +; ret + +function u0:0(i16) -> f32 { +block0(v0: i16): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; fcvt.s.lu fa0,a0 +; ret + +function u0:0(i16) -> f64 { +block0(v0: i16): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; fcvt.d.lu fa0,a0 +; ret + +function u0:0(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_uint.i8 v0 + return v1 +} + +; block0: +; fcvt_to_uint.i8 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function u0:0(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_uint.i8 v0 + return v1 +} + +; block0: +; fcvt_to_uint.i8 a0,fa0##in_ty=f64 tmp=ft4 +; ret + +function u0:0(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_uint.i16 v0 + return v1 +} + +; block0: +; fcvt_to_uint.i16 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function u0:0(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_uint.i16 v0 + return v1 +} + +; block0: +; fcvt_to_uint.i16 a0,fa0##in_ty=f64 tmp=ft4 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/float.clif b/cranelift/filetests/filetests/isa/riscv64/float.clif new file mode 100644 index 000000000000..e231d4fde612 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/float.clif @@ -0,0 +1,576 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f1(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fadd v0, v1 + return v2 +} + +; block0: +; fadd.s fa0,fa0,fa1 +; ret + +function %f2(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fadd v0, v1 + return v2 +} + +; block0: +; fadd.d fa0,fa0,fa1 +; ret + +function %f3(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fsub v0, v1 + return v2 +} + +; block0: +; fsub.s fa0,fa0,fa1 +; ret + +function %f4(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fsub v0, v1 + return v2 +} + +; block0: +; fsub.d fa0,fa0,fa1 +; ret + +function %f5(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmul v0, v1 + return v2 +} + +; block0: +; fmul.s fa0,fa0,fa1 +; ret + +function %f6(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmul v0, v1 + return v2 +} + +; block0: +; fmul.d fa0,fa0,fa1 +; ret + +function %f7(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fdiv v0, v1 + return v2 +} + +; block0: +; fdiv.s fa0,fa0,fa1 +; ret + +function %f8(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fdiv v0, v1 + return v2 +} + +; block0: +; fdiv.d fa0,fa0,fa1 +; ret + +function %f9(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmin v0, v1 + return v2 +} + +; block0: +; fmin.s ft4,fa0,fa1##tmp=a2 ty=f32 +; fmv.d fa0,ft4 +; ret + +function %f10(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmin v0, v1 + return v2 +} + +; block0: +; fmin.d ft4,fa0,fa1##tmp=a2 ty=f64 +; fmv.d fa0,ft4 +; ret + +function %f11(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmax v0, v1 + return v2 +} + +; block0: +; fmax.s ft4,fa0,fa1##tmp=a2 ty=f32 +; fmv.d fa0,ft4 +; ret + +function %f12(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmax v0, v1 + return v2 +} + +; block0: +; fmax.d ft4,fa0,fa1##tmp=a2 ty=f64 +; fmv.d fa0,ft4 +; ret + +function %f13(f32) -> f32 { +block0(v0: f32): + v1 = sqrt v0 + return v1 +} + +; block0: +; fsqrt.s fa0,fa0 +; ret + +function %f15(f64) -> f64 { +block0(v0: f64): + v1 = sqrt v0 + return v1 +} + +; block0: +; fsqrt.d fa0,fa0 +; ret + +function %f16(f32) -> f32 { +block0(v0: f32): + v1 = fabs v0 + return v1 +} + +; block0: +; fabs.s fa0,fa0 +; ret + +function %f17(f64) -> f64 { +block0(v0: f64): + v1 = fabs v0 + return v1 +} + +; block0: +; fabs.d fa0,fa0 +; ret + +function %f18(f32) -> f32 { +block0(v0: f32): + v1 = fneg v0 + return v1 +} + +; block0: +; fneg.s fa0,fa0 +; ret + +function %f19(f64) -> f64 { +block0(v0: f64): + v1 = fneg v0 + return v1 +} + +; block0: +; fneg.d fa0,fa0 +; ret + +function %f20(f32) -> f64 { +block0(v0: f32): + v1 = fpromote.f64 v0 + return v1 +} + +; block0: +; fcvt.d.s fa0,fa0 +; ret + +function %f21(f64) -> f32 { +block0(v0: f64): + v1 = fdemote.f32 v0 + return v1 +} + +; block0: +; fcvt.s.d fa0,fa0 +; ret + +function %f22(f32) -> f32 { +block0(v0: f32): + v1 = ceil v0 + return v1 +} + +; block0: +; ceil ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f32 +; fmv.d fa0,ft3 +; ret + +function %f22(f64) -> f64 { +block0(v0: f64): + v1 = ceil v0 + return v1 +} + +; block0: +; ceil ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f64 +; fmv.d fa0,ft3 +; ret + +function %f23(f32) -> f32 { +block0(v0: f32): + v1 = floor v0 + return v1 +} + +; block0: +; floor ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f32 +; fmv.d fa0,ft3 +; ret + +function %f24(f64) -> f64 { +block0(v0: f64): + v1 = floor v0 + return v1 +} + +; block0: +; floor ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f64 +; fmv.d fa0,ft3 +; ret + +function %f25(f32) -> f32 { +block0(v0: f32): + v1 = trunc v0 + return v1 +} + +; block0: +; trunc ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f32 +; fmv.d fa0,ft3 +; ret + +function %f26(f64) -> f64 { +block0(v0: f64): + v1 = trunc v0 + return v1 +} + +; block0: +; trunc ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f64 +; fmv.d fa0,ft3 +; ret + +function %f27(f32) -> f32 { +block0(v0: f32): + v1 = nearest v0 + return v1 +} + +; block0: +; nearest ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f32 +; fmv.d fa0,ft3 +; ret + +function %f28(f64) -> f64 { +block0(v0: f64): + v1 = nearest v0 + return v1 +} + +; block0: +; nearest ft3,fa0##int_tmp=a1 f_tmp=ft5 ty=f64 +; fmv.d fa0,ft3 +; ret + +function %f29(f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32): + v3 = fma v0, v1, v2 + return v3 +} + +; block0: +; fmadd.s fa0,fa0,fa1,fa2 +; ret + +function %f30(f64, f64, f64) -> f64 { +block0(v0: f64, v1: f64, v2: f64): + v3 = fma v0, v1, v2 + return v3 +} + +; block0: +; fmadd.d fa0,fa0,fa1,fa2 +; ret + +function %f31(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fcopysign v0, v1 + return v2 +} + +; block0: +; fsgnj.s fa0,fa0,fa1 +; ret + +function %f32(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fcopysign v0, v1 + return v2 +} + +; block0: +; fsgnj.d fa0,fa0,fa1 +; ret + +function %f33(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +; block0: +; fcvt_to_uint.i32 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function %f34(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +; block0: +; fcvt_to_sint.i32 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function %f35(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +; block0: +; fcvt_to_uint.i64 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function %f36(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +; block0: +; fcvt_to_sint.i64 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function %f37(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +; block0: +; fcvt_to_uint.i32 a0,fa0##in_ty=f64 tmp=ft4 +; ret + +function %f38(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +; block0: +; fcvt_to_sint.i32 a0,fa0##in_ty=f64 tmp=ft4 +; ret + +function %f39(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +; block0: +; fcvt_to_uint.i64 a0,fa0##in_ty=f64 tmp=ft4 +; ret + +function %f40(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +; block0: +; fcvt_to_sint.i64 a0,fa0##in_ty=f64 tmp=ft4 +; ret + +function %f41(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; fcvt.s.wu fa0,a0 +; ret + +function %f42(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; block0: +; fcvt.s.w fa0,a0 +; ret + +function %f43(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; fcvt.s.lu fa0,a0 +; ret + +function %f44(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; block0: +; fcvt.s.l fa0,a0 +; ret + +function %f45(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; fcvt.d.wu fa0,a0 +; ret + +function %f46(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; block0: +; fcvt.d.w fa0,a0 +; ret + +function %f47(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; fcvt.d.lu fa0,a0 +; ret + +function %f48(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; block0: +; fcvt.d.l fa0,a0 +; ret + +function %f49(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +; block0: +; fcvt_to_uint_sat.i32 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function %f50(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +; block0: +; fcvt_to_sint_sat.i32 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function %f51(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +; block0: +; fcvt_to_uint_sat.i64 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function %f52(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +; block0: +; fcvt_to_sint_sat.i64 a0,fa0##in_ty=f32 tmp=ft4 +; ret + +function %f53(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +; block0: +; fcvt_to_uint_sat.i32 a0,fa0##in_ty=f64 tmp=ft4 +; ret + +function %f54(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +; block0: +; fcvt_to_sint_sat.i32 a0,fa0##in_ty=f64 tmp=ft4 +; ret + +function %f55(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +; block0: +; fcvt_to_uint_sat.i64 a0,fa0##in_ty=f64 tmp=ft4 +; ret + +function %f56(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +; block0: +; fcvt_to_sint_sat.i64 a0,fa0##in_ty=f64 tmp=ft4 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif new file mode 100644 index 000000000000..9f884f06447c --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif @@ -0,0 +1,53 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %dynamic_heap_check(i64 vmctx, i32) -> i64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0 + return v2 +} + +; block0: +; uext.w t3,a1 +; ld t4,0(a0) +; addi t4,t4,0 +; ugt t0,t3,t4##ty=i64 +; beq t0,zero,taken(label1),not_taken(label2) +; block1: +; add t0,a0,t3 +; ugt t3,t3,t4##ty=i64 +; li t1,0 +; selectif_spectre_guard a0,t1,t0##test=t3 +; ret +; block2: +; udf##trap_code=heap_oob + +function %static_heap_check(i64 vmctx, i32) -> i64 { + gv0 = vmctx + heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0 + return v2 +} + +; block0: +; uext.w t3,a1 +; lui a7,16 +; ugt t4,t3,a7##ty=i64 +; beq t4,zero,taken(label1),not_taken(label2) +; block1: +; add t4,a0,t3 +; lui a7,16 +; ugt t0,t3,a7##ty=i64 +; li t1,0 +; selectif_spectre_guard a0,t1,t4##test=t0 +; ret +; block2: +; udf##trap_code=heap_oob + diff --git a/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif b/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif new file mode 100644 index 000000000000..e5a546f7e1e1 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif @@ -0,0 +1,24 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function u0:0() -> i8 system_v { + +block0: + v0 = iconst.i16 0xddcc + v1 = icmp.i16 ne v0, v0 + v2 = bint.i8 v1 + return v2 +} + +; block0: +; lui t2,14 +; addi t2,t2,3532 +; lui a2,14 +; addi a2,a2,3532 +; uext.h a5,t2 +; uext.h a7,a2 +; ne t4,a5,a7##ty=i16 +; andi a0,t4,1 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/multivalue-ret.clif b/cranelift/filetests/filetests/isa/riscv64/multivalue-ret.clif new file mode 100644 index 000000000000..d19960db925a --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/multivalue-ret.clif @@ -0,0 +1,17 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +;; Test default (non-SpiderMonkey) ABI. +function %f() -> i64, i64 { +block1: + v0 = iconst.i64 1 + v1 = iconst.i64 2 + return v0, v1 +} + +; block0: +; li a0,1 +; li a1,2 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif b/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif new file mode 100644 index 000000000000..c8d217355191 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/narrow-arithmetic.clif @@ -0,0 +1,58 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %add8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = iadd.i8 v0, v1 + return v2 +} + +; block0: +; addw a0,a0,a1 +; ret + +function %add16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = iadd.i16 v0, v1 + return v2 +} + +; block0: +; addw a0,a0,a1 +; ret + +function %add32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd.i32 v0, v1 + return v2 +} + +; block0: +; addw a0,a0,a1 +; ret + +function %add32_8(i32, i8) -> i32 { +block0(v0: i32, v1: i8): + v2 = sextend.i32 v1 + v3 = iadd.i32 v0, v2 + return v3 +} + +; block0: +; sext.b a2,a1 +; addw a0,a0,a2 +; ret + +function %add64_32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; block0: +; sext.w a2,a1 +; add a0,a0,a2 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/prologue.clif b/cranelift/filetests/filetests/isa/riscv64/prologue.clif new file mode 100644 index 000000000000..439c3bc09d15 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/prologue.clif @@ -0,0 +1,279 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f(f64) -> f64 { +block0(v0: f64): + v1 = fadd.f64 v0, v0 + v2 = fadd.f64 v0, v0 + v3 = fadd.f64 v0, v0 + v4 = fadd.f64 v0, v0 + v5 = fadd.f64 v0, v0 + v6 = fadd.f64 v0, v0 + v7 = fadd.f64 v0, v0 + v8 = fadd.f64 v0, v0 + v9 = fadd.f64 v0, v0 + v10 = fadd.f64 v0, v0 + v11 = fadd.f64 v0, v0 + v12 = fadd.f64 v0, v0 + v13 = fadd.f64 v0, v0 + v14 = fadd.f64 v0, v0 + v15 = fadd.f64 v0, v0 + v16 = fadd.f64 v0, v0 + v17 = fadd.f64 v0, v0 + v18 = fadd.f64 v0, v0 + v19 = fadd.f64 v0, v0 + v20 = fadd.f64 v0, v0 + v21 = fadd.f64 v0, v0 + v22 = fadd.f64 v0, v0 + v23 = fadd.f64 v0, v0 + v24 = fadd.f64 v0, v0 + v25 = fadd.f64 v0, v0 + v26 = fadd.f64 v0, v0 + v27 = fadd.f64 v0, v0 + v28 = fadd.f64 v0, v0 + v29 = fadd.f64 v0, v0 + v30 = fadd.f64 v0, v0 + v31 = fadd.f64 v0, v0 + + v32 = fadd.f64 v0, v1 + v33 = fadd.f64 v2, v3 + v34 = fadd.f64 v4, v5 + v35 = fadd.f64 v6, v7 + v36 = fadd.f64 v8, v9 + v37 = fadd.f64 v10, v11 + v38 = fadd.f64 v12, v13 + v39 = fadd.f64 v14, v15 + v40 = fadd.f64 v16, v17 + v41 = fadd.f64 v18, v19 + v42 = fadd.f64 v20, v21 + v43 = fadd.f64 v22, v23 + v44 = fadd.f64 v24, v25 + v45 = fadd.f64 v26, v27 + v46 = fadd.f64 v28, v29 + v47 = fadd.f64 v30, v31 + + v48 = fadd.f64 v32, v33 + v49 = fadd.f64 v34, v35 + v50 = fadd.f64 v36, v37 + v51 = fadd.f64 v38, v39 + v52 = fadd.f64 v40, v41 + v53 = fadd.f64 v42, v43 + v54 = fadd.f64 v44, v45 + v55 = fadd.f64 v46, v47 + + v56 = fadd.f64 v48, v49 + v57 = fadd.f64 v50, v51 + v58 = fadd.f64 v52, v53 + v59 = fadd.f64 v54, v55 + + v60 = fadd.f64 v56, v57 + v61 = fadd.f64 v58, v59 + + v62 = fadd.f64 v60, v61 + + return v62 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; fsd fs0,-8(sp) +; fsd fs2,-16(sp) +; fsd fs3,-24(sp) +; fsd fs4,-32(sp) +; fsd fs5,-40(sp) +; fsd fs6,-48(sp) +; fsd fs7,-56(sp) +; fsd fs8,-64(sp) +; fsd fs9,-72(sp) +; fsd fs10,-80(sp) +; fsd fs11,-88(sp) +; add sp,-96 +; block0: +; fadd.d ft4,fa0,fa0 +; fadd.d ft5,fa0,fa0 +; fadd.d ft6,fa0,fa0 +; fadd.d ft7,fa0,fa0 +; fadd.d fa1,fa0,fa0 +; fadd.d fa2,fa0,fa0 +; fadd.d fa3,fa0,fa0 +; fadd.d fa4,fa0,fa0 +; fadd.d fa5,fa0,fa0 +; fadd.d fa6,fa0,fa0 +; fadd.d fa7,fa0,fa0 +; fadd.d ft8,fa0,fa0 +; fadd.d ft9,fa0,fa0 +; fadd.d ft10,fa0,fa0 +; fadd.d ft11,fa0,fa0 +; fadd.d ft0,fa0,fa0 +; fadd.d ft1,fa0,fa0 +; fadd.d ft2,fa0,fa0 +; fadd.d ft3,fa0,fa0 +; fadd.d fs4,fa0,fa0 +; fadd.d fs5,fa0,fa0 +; fadd.d fs6,fa0,fa0 +; fadd.d fs7,fa0,fa0 +; fadd.d fs8,fa0,fa0 +; fadd.d fs9,fa0,fa0 +; fadd.d fs10,fa0,fa0 +; fadd.d fs11,fa0,fa0 +; fadd.d fs0,fa0,fa0 +; fadd.d fs1,fa0,fa0 +; fadd.d fs2,fa0,fa0 +; fadd.d fs3,fa0,fa0 +; fadd.d ft4,fa0,ft4 +; fadd.d ft5,ft5,ft6 +; fadd.d ft6,ft7,fa1 +; fadd.d ft7,fa2,fa3 +; fadd.d fa0,fa4,fa5 +; fadd.d fa1,fa6,fa7 +; fadd.d fa2,ft8,ft9 +; fadd.d fa3,ft10,ft11 +; fadd.d fa4,ft0,ft1 +; fadd.d fa5,ft2,ft3 +; fadd.d fa6,fs4,fs5 +; fadd.d fa7,fs6,fs7 +; fadd.d ft8,fs8,fs9 +; fadd.d ft9,fs10,fs11 +; fadd.d ft10,fs0,fs1 +; fadd.d ft11,fs2,fs3 +; fadd.d ft4,ft4,ft5 +; fadd.d ft5,ft6,ft7 +; fadd.d ft6,fa0,fa1 +; fadd.d ft7,fa2,fa3 +; fadd.d fa0,fa4,fa5 +; fadd.d fa1,fa6,fa7 +; fadd.d fa2,ft8,ft9 +; fadd.d fa3,ft10,ft11 +; fadd.d ft4,ft4,ft5 +; fadd.d ft5,ft6,ft7 +; fadd.d ft6,fa0,fa1 +; fadd.d ft7,fa2,fa3 +; fadd.d ft4,ft4,ft5 +; fadd.d ft5,ft6,ft7 +; fadd.d fa0,ft4,ft5 +; add sp,+96 +; fld fs0,-8(sp) +; fld fs2,-16(sp) +; fld fs3,-24(sp) +; fld fs4,-32(sp) +; fld fs5,-40(sp) +; fld fs6,-48(sp) +; fld fs7,-56(sp) +; fld fs8,-64(sp) +; fld fs9,-72(sp) +; fld fs10,-80(sp) +; fld fs11,-88(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %f2(i64) -> i64 { +block0(v0: i64): + v1 = iadd.i64 v0, v0 + v2 = iadd.i64 v0, v1 + v3 = iadd.i64 v0, v2 + v4 = iadd.i64 v0, v3 + v5 = iadd.i64 v0, v4 + v6 = iadd.i64 v0, v5 + v7 = iadd.i64 v0, v6 + v8 = iadd.i64 v0, v7 + v9 = iadd.i64 v0, v8 + v10 = iadd.i64 v0, v9 + v11 = iadd.i64 v0, v10 + v12 = iadd.i64 v0, v11 + v13 = iadd.i64 v0, v12 + v14 = iadd.i64 v0, v13 + v15 = iadd.i64 v0, v14 + v16 = iadd.i64 v0, v15 + v17 = iadd.i64 v0, v16 + v18 = iadd.i64 v0, v17 + + v19 = iadd.i64 v0, v1 + v20 = iadd.i64 v2, v3 + v21 = iadd.i64 v4, v5 + v22 = iadd.i64 v6, v7 + v23 = iadd.i64 v8, v9 + v24 = iadd.i64 v10, v11 + v25 = iadd.i64 v12, v13 + v26 = iadd.i64 v14, v15 + v27 = iadd.i64 v16, v17 + + v28 = iadd.i64 v18, v19 + v29 = iadd.i64 v20, v21 + v30 = iadd.i64 v22, v23 + v31 = iadd.i64 v24, v25 + v32 = iadd.i64 v26, v27 + + v33 = iadd.i64 v28, v29 + v34 = iadd.i64 v30, v31 + + v35 = iadd.i64 v32, v33 + v36 = iadd.i64 v34, v35 + + return v36 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; sd s6,-8(sp) +; sd s7,-16(sp) +; sd s8,-24(sp) +; sd s9,-32(sp) +; sd s10,-40(sp) +; sd s11,-48(sp) +; add sp,-48 +; block0: +; add t4,a0,a0 +; add t0,a0,t4 +; add t1,a0,t0 +; add t2,a0,t1 +; add a1,a0,t2 +; add a2,a0,a1 +; add a3,a0,a2 +; add a4,a0,a3 +; add a5,a0,a4 +; add a6,a0,a5 +; add a7,a0,a6 +; add t3,a0,a7 +; add s6,a0,t3 +; add s7,a0,s6 +; add s8,a0,s7 +; add s9,a0,s8 +; add s10,a0,s9 +; add s11,a0,s10 +; add t4,a0,t4 +; add t0,t0,t1 +; add t1,t2,a1 +; add t2,a2,a3 +; add a0,a4,a5 +; add a1,a6,a7 +; add a2,t3,s6 +; add a3,s7,s8 +; add a4,s9,s10 +; add t4,s11,t4 +; add t0,t0,t1 +; add t1,t2,a0 +; add t2,a1,a2 +; add a0,a3,a4 +; add t4,t4,t0 +; add t0,t1,t2 +; add t4,a0,t4 +; add a0,t0,t4 +; add sp,+48 +; ld s6,-8(sp) +; ld s7,-16(sp) +; ld s8,-24(sp) +; ld s9,-32(sp) +; ld s10,-40(sp) +; ld s11,-48(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/reduce.clif b/cranelift/filetests/filetests/isa/riscv64/reduce.clif new file mode 100644 index 000000000000..22d00e355f3f --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/reduce.clif @@ -0,0 +1,40 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %ireduce_128_64(i128) -> i64 { +block0(v0: i128): + v1 = ireduce.i64 v0 + return v1 +} + +; block0: +; ret + +function %ireduce_128_32(i128) -> i32 { +block0(v0: i128): + v1 = ireduce.i32 v0 + return v1 +} + +; block0: +; ret + +function %ireduce_128_16(i128) -> i16 { +block0(v0: i128): + v1 = ireduce.i16 v0 + return v1 +} + +; block0: +; ret + +function %ireduce_128_8(i128) -> i8 { +block0(v0: i128): + v1 = ireduce.i8 v0 + return v1 +} + +; block0: +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/reftypes.clif b/cranelift/filetests/filetests/isa/riscv64/reftypes.clif new file mode 100644 index 000000000000..4ce8c491a0c4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/reftypes.clif @@ -0,0 +1,103 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f0(r64) -> r64 { +block0(v0: r64): + return v0 +} + +; block0: +; ret + +function %f1(r64) -> b1 { +block0(v0: r64): + v1 = is_null v0 + return v1 +} + +; block0: +; is_null a0,a0 +; ret + +function %f2(r64) -> b1 { +block0(v0: r64): + v1 = is_invalid v0 + return v1 +} + +; block0: +; is_invalid a0,a0 +; ret + +function %f3() -> r64 { +block0: + v0 = null.r64 + return v0 +} + +; block0: +; li a0,0 +; ret + +function %f4(r64, r64) -> r64, r64, r64 { + fn0 = %f(r64) -> b1 + ss0 = explicit_slot 8 + +block0(v0: r64, v1: r64): + v2 = call fn0(v0) + stack_store.r64 v0, ss0 + brz v2, block1(v1, v0) + jump block2(v0, v1) + +block1(v3: r64, v4: r64): + jump block3(v3, v4) + +block2(v5: r64, v6: r64): + jump block3(v5, v6) + +block3(v7: r64, v8: r64): + v9 = stack_load.r64 ss0 + return v7, v8, v9 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; sd s9,-8(sp) +; add sp,-48 +; block0: +; sd a0,8(nominal_sp) +; sd a1,16(nominal_sp) +; mv s9,a2 +; load_sym a3,%f+0 +; callind a3 +; load_addr a2,nsp+0 +; ld t1,8(nominal_sp) +; sd t1,0(a2) +; beq a0,zero,taken(label1),not_taken(label3) +; block1: +; j label2 +; block2: +; mv a1,t1 +; ld a0,16(nominal_sp) +; j label5 +; block3: +; j label4 +; block4: +; mv a0,t1 +; ld a1,16(nominal_sp) +; j label5 +; block5: +; load_addr a4,nsp+0 +; ld a4,0(a4) +; mv a2,s9 +; sd a4,0(a2) +; add sp,+48 +; ld s9,-8(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/shift-op.clif b/cranelift/filetests/filetests/isa/riscv64/shift-op.clif new file mode 100644 index 000000000000..103aac23cb79 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/shift-op.clif @@ -0,0 +1,28 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = ishl.i64 v0, v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; block0: +; slli a1,a0,3 +; add a0,a0,a1 +; ret + +function %f(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 53 + v2 = ishl.i32 v0, v1 + return v2 +} + +; block0: +; slliw a0,a0,53 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif b/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif new file mode 100644 index 000000000000..3ba647fecbfd --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif @@ -0,0 +1,451 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ROR, variable +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %i128_rotr(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = rotr.i128 v0, v1 + return v2 +} + +; block0: +; andi a4,a2,127 +; li a6,128 +; sub a6,a6,a4 +; srl t4,a0,a4 +; sll t1,a1,a6 +; select_reg a2,zero,t1##condition=(a4 eq zero) +; or a2,t4,a2 +; srl a5,a1,a4 +; sll a6,a0,a6 +; select_reg t3,zero,a6##condition=(a4 eq zero) +; or t0,a5,t3 +; li t2,64 +; select_reg a0,t0,a2##condition=(a4 uge t2) +; select_reg a1,a2,t0##condition=(a4 uge t2) +; ret + +function %f0(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotr.i64 v0, v1 + return v2 +} + +; block0: +; andi a1,a1,63 +; li a3,64 +; sub a3,a3,a1 +; srl a6,a0,a1 +; sll t3,a0,a3 +; select_reg t0,zero,t3##condition=(a1 eq zero) +; or a0,a6,t0 +; ret + +function %f1(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotr.i32 v0, v1 + return v2 +} + +; block0: +; uext.w a2,a0 +; andi a3,a1,31 +; li a5,32 +; sub a5,a5,a3 +; srl t3,a2,a3 +; sll t0,a2,a5 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; or a0,t3,t2 +; ret + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = rotr.i16 v0, v1 + return v2 +} + +; block0: +; uext.h a2,a0 +; andi a3,a1,15 +; li a5,16 +; sub a5,a5,a3 +; srl t3,a2,a3 +; sll t0,a2,a5 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; or a0,t3,t2 +; ret + +function %f3(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotr.i8 v0, v1 + return v2 +} + +; block0: +; uext.b a2,a0 +; andi a3,a1,7 +; li a5,8 +; sub a5,a5,a3 +; srl t3,a2,a3 +; sll t0,a2,a5 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; or a0,t3,t2 +; ret + +function %i128_rotl(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = rotl.i128 v0, v1 + return v2 +} + +; block0: +; andi a4,a2,127 +; li a6,128 +; sub a6,a6,a4 +; sll t4,a0,a4 +; srl t1,a1,a6 +; select_reg a2,zero,t1##condition=(a4 eq zero) +; or a2,t4,a2 +; sll a5,a1,a4 +; srl a6,a0,a6 +; select_reg t3,zero,a6##condition=(a4 eq zero) +; or t0,a5,t3 +; li t2,64 +; select_reg a0,t0,a2##condition=(a4 uge t2) +; select_reg a1,a2,t0##condition=(a4 uge t2) +; ret + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotl.i64 v0, v1 + return v2 +} + +; block0: +; andi a1,a1,63 +; li a3,64 +; sub a3,a3,a1 +; sll a6,a0,a1 +; srl t3,a0,a3 +; select_reg t0,zero,t3##condition=(a1 eq zero) +; or a0,a6,t0 +; ret + +function %f5(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotl.i32 v0, v1 + return v2 +} + +; block0: +; uext.w a2,a0 +; andi a3,a1,31 +; li a5,32 +; sub a5,a5,a3 +; sll t3,a2,a3 +; srl t0,a2,a5 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; or a0,t3,t2 +; ret + +function %f6(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = rotl.i16 v0, v1 + return v2 +} + +; block0: +; uext.h a2,a0 +; andi a3,a1,15 +; li a5,16 +; sub a5,a5,a3 +; sll t3,a2,a3 +; srl t0,a2,a5 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; or a0,t3,t2 +; ret + +function %f7(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotl.i8 v0, v1 + return v2 +} + +; block0: +; uext.b a2,a0 +; andi a3,a1,7 +; li a5,8 +; sub a5,a5,a3 +; sll t3,a2,a3 +; srl t0,a2,a5 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; or a0,t3,t2 +; ret + +function %f8(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ushr.i64 v0, v1 + return v2 +} + +; block0: +; srl a0,a0,a1 +; ret + +function %f9(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ushr.i32 v0, v1 + return v2 +} + +; block0: +; srlw a0,a0,a1 +; ret + +function %f10(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ushr.i16 v0, v1 + return v2 +} + +; block0: +; mv a5,a1 +; uext.h a1,a0 +; andi a3,a5,15 +; srlw a0,a1,a3 +; ret + +function %f11(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ushr.i8 v0, v1 + return v2 +} + +; block0: +; mv a5,a1 +; uext.b a1,a0 +; andi a3,a5,7 +; srlw a0,a1,a3 +; ret + +function %f12(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl.i64 v0, v1 + return v2 +} + +; block0: +; sll a0,a0,a1 +; ret + +function %f13(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ishl.i32 v0, v1 + return v2 +} + +; block0: +; sllw a0,a0,a1 +; ret + +function %f14(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ishl.i16 v0, v1 + return v2 +} + +; block0: +; andi a1,a1,15 +; sllw a0,a0,a1 +; ret + +function %f15(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ishl.i8 v0, v1 + return v2 +} + +; block0: +; andi a1,a1,7 +; sllw a0,a0,a1 +; ret + +function %f16(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sshr.i64 v0, v1 + return v2 +} + +; block0: +; sra a0,a0,a1 +; ret + +function %f17(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sshr.i32 v0, v1 + return v2 +} + +; block0: +; sraw a0,a0,a1 +; ret + +function %f18(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sshr.i16 v0, v1 + return v2 +} + +; block0: +; mv a5,a1 +; sext.h a1,a0 +; andi a3,a5,15 +; sra a0,a1,a3 +; ret + +function %f19(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sshr.i8 v0, v1 + return v2 +} + +; block0: +; mv a5,a1 +; sext.b a1,a0 +; andi a3,a5,7 +; sra a0,a1,a3 +; ret + +function %f20(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = rotr.i64 v0, v1 + return v2 +} + +; block0: +; li a1,17 +; andi a2,a1,63 +; li a4,64 +; sub a4,a4,a2 +; srl a7,a0,a2 +; sll t4,a0,a4 +; select_reg t1,zero,t4##condition=(a2 eq zero) +; or a0,a7,t1 +; ret + +function %f21(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = rotl.i64 v0, v1 + return v2 +} + +; block0: +; li a1,17 +; andi a2,a1,63 +; li a4,64 +; sub a4,a4,a2 +; sll a7,a0,a2 +; srl t4,a0,a4 +; select_reg t1,zero,t4##condition=(a2 eq zero) +; or a0,a7,t1 +; ret + +function %f22(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = rotl.i32 v0, v1 + return v2 +} + +; block0: +; mv t4,a0 +; li a0,17 +; uext.w a2,t4 +; andi a4,a0,31 +; li a6,32 +; sub a6,a6,a4 +; sll t4,a2,a4 +; srl t1,a2,a6 +; select_reg a0,zero,t1##condition=(a4 eq zero) +; or a0,t4,a0 +; ret + +function %f23(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = rotl.i16 v0, v1 + return v2 +} + +; block0: +; mv t4,a0 +; li a0,10 +; uext.h a2,t4 +; andi a4,a0,15 +; li a6,16 +; sub a6,a6,a4 +; sll t4,a2,a4 +; srl t1,a2,a6 +; select_reg a0,zero,t1##condition=(a4 eq zero) +; or a0,t4,a0 +; ret + +function %f24(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = rotl.i8 v0, v1 + return v2 +} + +; block0: +; mv t4,a0 +; li a0,3 +; uext.b a2,t4 +; andi a4,a0,7 +; li a6,8 +; sub a6,a6,a4 +; sll t4,a2,a4 +; srl t1,a2,a6 +; select_reg a0,zero,t1##condition=(a4 eq zero) +; or a0,t4,a0 +; ret + +function %f25(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = ushr.i64 v0, v1 + return v2 +} + +; block0: +; srli a0,a0,17 +; ret + +function %f26(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = sshr.i64 v0, v1 + return v2 +} + +; block0: +; srai a0,a0,17 +; ret + +function %f27(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = ishl.i64 v0, v1 + return v2 +} + +; block0: +; slli a0,a0,17 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif b/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif new file mode 100644 index 000000000000..46f612db4299 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/stack-limit.clif @@ -0,0 +1,206 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %foo() { +block0: + return +} + +; block0: +; ret + +function %stack_limit_leaf_zero(i64 stack_limit) { +block0(v0: i64): + return +} + +; block0: +; ret + +function %stack_limit_gv_leaf_zero(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 +block0(v0: i64): + return +} + +; block0: +; ret + +function %stack_limit_call_zero(i64 stack_limit) { + fn0 = %foo() +block0(v0: i64): + call fn0() + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; trap_ifc stk_ovf##(sp ult a0) +; block0: +; load_sym t2,%foo+0 +; callind t2 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %stack_limit_gv_call_zero(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + fn0 = %foo() +block0(v0: i64): + call fn0() + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; ld t6,0(a0) +; ld t6,4(t6) +; trap_ifc stk_ovf##(sp ult t6) +; block0: +; load_sym t2,%foo+0 +; callind t2 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %stack_limit(i64 stack_limit) { + ss0 = explicit_slot 168 +block0(v0: i64): + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; andi t6,a0,176 +; trap_ifc stk_ovf##(sp ult t6) +; add sp,-176 +; block0: +; add sp,+176 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %huge_stack_limit(i64 stack_limit) { + ss0 = explicit_slot 400000 +block0(v0: i64): + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; trap_ifc stk_ovf##(sp ult a0) +; lui t5,98 +; addi t5,t5,2688 +; add t6,t5,a0 +; trap_ifc stk_ovf##(sp ult t6) +; lui a0,98 +; addi a0,a0,2688 +; call %Probestack +; add sp,-400000 +; block0: +; add sp,+400000 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %limit_preamble(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + ss0 = explicit_slot 20 +block0(v0: i64): + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; ld t6,0(a0) +; ld t6,4(t6) +; andi t6,t6,32 +; trap_ifc stk_ovf##(sp ult t6) +; add sp,-32 +; block0: +; add sp,+32 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %limit_preamble_huge(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + ss0 = explicit_slot 400000 +block0(v0: i64): + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; ld t6,0(a0) +; ld t6,4(t6) +; trap_ifc stk_ovf##(sp ult t6) +; lui t5,98 +; addi t5,t5,2688 +; add t6,t5,t6 +; trap_ifc stk_ovf##(sp ult t6) +; lui a0,98 +; addi a0,a0,2688 +; call %Probestack +; add sp,-400000 +; block0: +; add sp,+400000 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %limit_preamble_huge_offset(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+400000 + stack_limit = gv1 + ss0 = explicit_slot 20 +block0(v0: i64): + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; ld t6,400000(a0) +; andi t6,t6,32 +; trap_ifc stk_ovf##(sp ult t6) +; add sp,-32 +; block0: +; add sp,+32 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/stack.clif b/cranelift/filetests/filetests/isa/riscv64/stack.clif new file mode 100644 index 000000000000..5e3aaca467e6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/stack.clif @@ -0,0 +1,630 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %stack_addr_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-16 +; block0: +; load_addr a0,nsp+0 +; add sp,+16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %stack_addr_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; lui a0,24 +; addi a0,a0,1712 +; call %Probestack +; add sp,-100016 +; block0: +; load_addr a0,nsp+0 +; add sp,+100016 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %stack_load_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-16 +; block0: +; load_addr t2,nsp+0 +; ld a0,0(t2) +; add sp,+16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %stack_load_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; lui a0,24 +; addi a0,a0,1712 +; call %Probestack +; add sp,-100016 +; block0: +; load_addr t2,nsp+0 +; ld a0,0(t2) +; add sp,+100016 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %stack_store_small(i64) { +ss0 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-16 +; block0: +; load_addr t2,nsp+0 +; sd a0,0(t2) +; add sp,+16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %stack_store_big(i64) { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; lui a0,24 +; addi a0,a0,1712 +; call %Probestack +; add sp,-100016 +; block0: +; load_addr t2,nsp+0 +; sd a0,0(t2) +; add sp,+100016 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %b1_spill_slot(b1) -> b1, i64 { + ss0 = explicit_slot 1000 + +block0(v0: b1): + v1 = iconst.i64 1 + v2 = iconst.i64 2 + v3 = iconst.i64 3 + v4 = iconst.i64 4 + v5 = iconst.i64 5 + v6 = iconst.i64 6 + v7 = iconst.i64 7 + v8 = iconst.i64 8 + v9 = iconst.i64 9 + v10 = iconst.i64 10 + v11 = iconst.i64 11 + v12 = iconst.i64 12 + v13 = iconst.i64 13 + v14 = iconst.i64 14 + v15 = iconst.i64 15 + v16 = iconst.i64 16 + v17 = iconst.i64 17 + v18 = iconst.i64 18 + v19 = iconst.i64 19 + v20 = iconst.i64 20 + v21 = iconst.i64 21 + v22 = iconst.i64 22 + v23 = iconst.i64 23 + v24 = iconst.i64 24 + v25 = iconst.i64 25 + v26 = iconst.i64 26 + v27 = iconst.i64 27 + v28 = iconst.i64 28 + v29 = iconst.i64 29 + v30 = iconst.i64 30 + v31 = iconst.i64 31 + v32 = iconst.i64 32 + v33 = iconst.i64 33 + v34 = iconst.i64 34 + v35 = iconst.i64 35 + v36 = iconst.i64 36 + v37 = iconst.i64 37 + v38 = iconst.i64 38 + v39 = iconst.i64 39 + v40 = iconst.i64 30 + v41 = iconst.i64 31 + v42 = iconst.i64 32 + v43 = iconst.i64 33 + v44 = iconst.i64 34 + v45 = iconst.i64 35 + v46 = iconst.i64 36 + v47 = iconst.i64 37 + v48 = iconst.i64 38 + v49 = iconst.i64 39 + v50 = iconst.i64 30 + v51 = iconst.i64 31 + v52 = iconst.i64 32 + v53 = iconst.i64 33 + v54 = iconst.i64 34 + v55 = iconst.i64 35 + v56 = iconst.i64 36 + v57 = iconst.i64 37 + v58 = iconst.i64 38 + v59 = iconst.i64 39 + v60 = iconst.i64 30 + v61 = iconst.i64 31 + v62 = iconst.i64 32 + v63 = iconst.i64 33 + v64 = iconst.i64 34 + v65 = iconst.i64 35 + v66 = iconst.i64 36 + v67 = iconst.i64 37 + v68 = iconst.i64 38 + v69 = iconst.i64 39 + + v70 = iadd.i64 v1, v2 + v71 = iadd.i64 v3, v4 + v72 = iadd.i64 v5, v6 + v73 = iadd.i64 v7, v8 + v74 = iadd.i64 v9, v10 + v75 = iadd.i64 v11, v12 + v76 = iadd.i64 v13, v14 + v77 = iadd.i64 v15, v16 + v78 = iadd.i64 v17, v18 + v79 = iadd.i64 v19, v20 + v80 = iadd.i64 v21, v22 + v81 = iadd.i64 v23, v24 + v82 = iadd.i64 v25, v26 + v83 = iadd.i64 v27, v28 + v84 = iadd.i64 v29, v30 + v85 = iadd.i64 v31, v32 + v86 = iadd.i64 v33, v34 + v87 = iadd.i64 v35, v36 + v88 = iadd.i64 v37, v38 + v89 = iadd.i64 v39, v40 + v90 = iadd.i64 v41, v42 + v91 = iadd.i64 v43, v44 + v92 = iadd.i64 v45, v46 + v93 = iadd.i64 v47, v48 + v94 = iadd.i64 v49, v50 + v95 = iadd.i64 v51, v52 + v96 = iadd.i64 v53, v54 + v97 = iadd.i64 v55, v56 + v98 = iadd.i64 v57, v58 + v99 = iadd.i64 v59, v60 + v100 = iadd.i64 v61, v62 + v101 = iadd.i64 v63, v64 + v102 = iadd.i64 v65, v66 + v103 = iadd.i64 v67, v68 + + v104 = iadd.i64 v69, v70 + v105 = iadd.i64 v71, v72 + v106 = iadd.i64 v73, v74 + v107 = iadd.i64 v75, v76 + v108 = iadd.i64 v77, v78 + v109 = iadd.i64 v79, v80 + v110 = iadd.i64 v81, v82 + v111 = iadd.i64 v83, v84 + v112 = iadd.i64 v85, v86 + v113 = iadd.i64 v87, v88 + v114 = iadd.i64 v89, v90 + v115 = iadd.i64 v91, v92 + v116 = iadd.i64 v93, v94 + v117 = iadd.i64 v95, v96 + v118 = iadd.i64 v97, v98 + v119 = iadd.i64 v99, v100 + v120 = iadd.i64 v101, v102 + + v121 = iadd.i64 v103, v104 + v122 = iadd.i64 v105, v106 + v123 = iadd.i64 v107, v108 + v124 = iadd.i64 v109, v110 + v125 = iadd.i64 v111, v112 + v126 = iadd.i64 v113, v114 + v127 = iadd.i64 v115, v116 + v128 = iadd.i64 v117, v118 + v129 = iadd.i64 v119, v120 + + v130 = iadd.i64 v121, v122 + v131 = iadd.i64 v123, v124 + v132 = iadd.i64 v125, v126 + v133 = iadd.i64 v127, v128 + + v134 = iadd.i64 v129, v130 + v135 = iadd.i64 v131, v132 + + v136 = iadd.i64 v133, v134 + v137 = iadd.i64 v135, v136 + + return v0, v137 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; sd s1,-8(sp) +; sd s2,-16(sp) +; sd s3,-24(sp) +; sd s4,-32(sp) +; sd s5,-40(sp) +; sd s6,-48(sp) +; sd s7,-56(sp) +; sd s8,-64(sp) +; sd s9,-72(sp) +; sd s10,-80(sp) +; sd s11,-88(sp) +; add sp,-1280 +; block0: +; sd a0,1000(nominal_sp) +; li t0,2 +; addi a1,t0,1 +; sd a1,1176(nominal_sp) +; li t0,4 +; addi a2,t0,3 +; sd a2,1168(nominal_sp) +; li t0,6 +; addi a3,t0,5 +; sd a3,1160(nominal_sp) +; li t0,8 +; addi a4,t0,7 +; sd a4,1152(nominal_sp) +; li t0,10 +; addi a5,t0,9 +; sd a5,1144(nominal_sp) +; li t0,12 +; addi a6,t0,11 +; sd a6,1136(nominal_sp) +; li t0,14 +; addi a7,t0,13 +; sd a7,1128(nominal_sp) +; li t0,16 +; addi t3,t0,15 +; sd t3,1120(nominal_sp) +; li t0,18 +; addi t4,t0,17 +; sd t4,1112(nominal_sp) +; li t0,20 +; addi t0,t0,19 +; sd t0,1104(nominal_sp) +; li t0,22 +; addi t1,t0,21 +; sd t1,1096(nominal_sp) +; li t0,24 +; addi s8,t0,23 +; sd s8,1088(nominal_sp) +; li t0,26 +; addi s9,t0,25 +; sd s9,1080(nominal_sp) +; li t0,28 +; addi s10,t0,27 +; sd s10,1072(nominal_sp) +; li t0,30 +; addi s11,t0,29 +; sd s11,1064(nominal_sp) +; li t0,32 +; addi s1,t0,31 +; sd s1,1056(nominal_sp) +; li t0,34 +; addi s2,t0,33 +; sd s2,1048(nominal_sp) +; li t0,36 +; addi s3,t0,35 +; sd s3,1040(nominal_sp) +; li t0,38 +; addi s4,t0,37 +; sd s4,1032(nominal_sp) +; li t0,30 +; addi s5,t0,39 +; sd s5,1024(nominal_sp) +; li t0,32 +; addi s6,t0,31 +; sd s6,1016(nominal_sp) +; li t0,34 +; addi s7,t0,33 +; sd s7,1008(nominal_sp) +; li t0,36 +; addi s7,t0,35 +; li t0,38 +; addi a0,t0,37 +; li t0,30 +; addi t2,t0,39 +; li t0,32 +; addi a1,t0,31 +; li t0,34 +; addi a2,t0,33 +; li t0,36 +; addi a3,t0,35 +; li t0,38 +; addi a4,t0,37 +; li t0,30 +; addi a5,t0,39 +; li t0,32 +; addi a6,t0,31 +; li t0,34 +; addi a7,t0,33 +; li t0,36 +; addi t3,t0,35 +; li t0,38 +; addi t4,t0,37 +; ld t0,1176(nominal_sp) +; addi t0,t0,39 +; ld t1,1160(nominal_sp) +; ld s4,1168(nominal_sp) +; add t1,s4,t1 +; ld s11,1144(nominal_sp) +; ld s9,1152(nominal_sp) +; add s8,s9,s11 +; ld s5,1128(nominal_sp) +; ld s3,1136(nominal_sp) +; add s9,s3,s5 +; ld s10,1112(nominal_sp) +; ld s11,1120(nominal_sp) +; add s10,s11,s10 +; ld s4,1096(nominal_sp) +; ld s2,1104(nominal_sp) +; add s11,s2,s4 +; ld s1,1080(nominal_sp) +; ld s2,1088(nominal_sp) +; add s1,s2,s1 +; ld s3,1064(nominal_sp) +; ld s2,1072(nominal_sp) +; add s2,s2,s3 +; ld s3,1048(nominal_sp) +; ld s6,1056(nominal_sp) +; add s3,s6,s3 +; ld s4,1032(nominal_sp) +; ld s5,1040(nominal_sp) +; add s4,s5,s4 +; ld s6,1016(nominal_sp) +; ld s5,1024(nominal_sp) +; add s5,s5,s6 +; ld s6,1008(nominal_sp) +; add s7,s6,s7 +; add t2,a0,t2 +; add a0,a1,a2 +; add a1,a3,a4 +; add a2,a5,a6 +; add a3,a7,t3 +; add a4,t4,t0 +; add t1,t1,s8 +; add a5,s9,s10 +; add a6,s11,s1 +; add a7,s2,s3 +; add t3,s4,s5 +; add t2,s7,t2 +; add a0,a0,a1 +; add a1,a2,a3 +; add t1,a4,t1 +; add a2,a5,a6 +; add a3,a7,t3 +; add t2,t2,a0 +; add t1,a1,t1 +; add a0,a2,a3 +; add t1,t2,t1 +; add a1,a0,t1 +; ld a0,1000(nominal_sp) +; add sp,+1280 +; ld s1,-8(sp) +; ld s2,-16(sp) +; ld s3,-24(sp) +; ld s4,-32(sp) +; ld s5,-40(sp) +; ld s6,-48(sp) +; ld s7,-56(sp) +; ld s8,-64(sp) +; ld s9,-72(sp) +; ld s10,-80(sp) +; ld s11,-88(sp) +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %i128_stack_store(i128) { +ss0 = explicit_slot 16 + +block0(v0: i128): + stack_store.i128 v0, ss0 + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-16 +; block0: +; mv a2,a0 +; load_addr a0,nsp+0 +; sd a2,0(a0) +; sd a1,8(a0) +; add sp,+16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %i128_stack_store_inst_offset(i128) { +ss0 = explicit_slot 16 +ss1 = explicit_slot 16 + +block0(v0: i128): + stack_store.i128 v0, ss1+16 + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-32 +; block0: +; mv a2,a0 +; load_addr a0,nsp+32 +; sd a2,0(a0) +; sd a1,8(a0) +; add sp,+32 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %i128_stack_store_big(i128) { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0(v0: i128): + stack_store.i128 v0, ss0 + return +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; lui a0,24 +; addi a0,a0,1712 +; call %Probestack +; add sp,-100016 +; block0: +; mv a2,a0 +; load_addr a0,nsp+0 +; sd a2,0(a0) +; sd a1,8(a0) +; add sp,+100016 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %i128_stack_load() -> i128 { +ss0 = explicit_slot 16 + +block0: + v0 = stack_load.i128 ss0 + return v0 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-16 +; block0: +; load_addr a1,nsp+0 +; ld a0,0(a1) +; ld a1,8(a1) +; add sp,+16 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %i128_stack_load_inst_offset() -> i128 { +ss0 = explicit_slot 16 +ss1 = explicit_slot 16 + +block0: + v0 = stack_load.i128 ss1+16 + return v0 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; add sp,-32 +; block0: +; load_addr a1,nsp+32 +; ld a0,0(a1) +; ld a1,8(a1) +; add sp,+32 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + +function %i128_stack_load_big() -> i128 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_load.i128 ss0 + return v0 +} + +; add sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; lui a0,24 +; addi a0,a0,1712 +; call %Probestack +; add sp,-100016 +; block0: +; load_addr a1,nsp+0 +; ld a0,0(a1) +; ld a1,8(a1) +; add sp,+100016 +; ld ra,8(sp) +; ld fp,0(sp) +; add sp,+16 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/symbol-value.clif b/cranelift/filetests/filetests/isa/riscv64/symbol-value.clif new file mode 100644 index 000000000000..a1353158cde8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/symbol-value.clif @@ -0,0 +1,16 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f() -> i64 { + gv0 = symbol %my_global + +block0: + v0 = symbol_value.i64 gv0 + return v0 +} + +; block0: +; load_sym a0,%my_global+0 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/traps.clif b/cranelift/filetests/filetests/isa/riscv64/traps.clif new file mode 100644 index 000000000000..9ebe09422cca --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/traps.clif @@ -0,0 +1,36 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f() { +block0: + trap user0 +} + +; block0: +; udf##trap_code=user0 + +function %g(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = ifcmp v0, v1 + trapif eq v2, user0 + return +} + +; block0: +; li t2,42 +; eq a1,a0,t2##ty=i64 +; trap_if a1,user0 +; ret + +function %h() { +block0: + debugtrap + return +} + +; block0: +; ebreak +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/uextend-sextend.clif b/cranelift/filetests/filetests/isa/riscv64/uextend-sextend.clif new file mode 100644 index 000000000000..151bf45a6435 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/uextend-sextend.clif @@ -0,0 +1,124 @@ +test compile precise-output +set unwind_info=false +target riscv64 + +function %f_u_8_64(i8) -> i64 { +block0(v0: i8): + v1 = uextend.i64 v0 + return v1 +} + +; block0: +; uext.b a0,a0 +; ret + +function %f_u_8_32(i8) -> i32 { +block0(v0: i8): + v1 = uextend.i32 v0 + return v1 +} + +; block0: +; uext.b a0,a0 +; ret + +function %f_u_8_16(i8) -> i16 { +block0(v0: i8): + v1 = uextend.i16 v0 + return v1 +} + +; block0: +; uext.b a0,a0 +; ret + +function %f_s_8_64(i8) -> i64 { +block0(v0: i8): + v1 = sextend.i64 v0 + return v1 +} + +; block0: +; sext.b a0,a0 +; ret + +function %f_s_8_32(i8) -> i32 { +block0(v0: i8): + v1 = sextend.i32 v0 + return v1 +} + +; block0: +; sext.b a0,a0 +; ret + +function %f_s_8_16(i8) -> i16 { +block0(v0: i8): + v1 = sextend.i16 v0 + return v1 +} + +; block0: +; sext.b a0,a0 +; ret + +function %f_u_16_64(i16) -> i64 { +block0(v0: i16): + v1 = uextend.i64 v0 + return v1 +} + +; block0: +; uext.h a0,a0 +; ret + +function %f_u_16_32(i16) -> i32 { +block0(v0: i16): + v1 = uextend.i32 v0 + return v1 +} + +; block0: +; uext.h a0,a0 +; ret + +function %f_s_16_64(i16) -> i64 { +block0(v0: i16): + v1 = sextend.i64 v0 + return v1 +} + +; block0: +; sext.h a0,a0 +; ret + +function %f_s_16_32(i16) -> i32 { +block0(v0: i16): + v1 = sextend.i32 v0 + return v1 +} + +; block0: +; sext.h a0,a0 +; ret + +function %f_u_32_64(i32) -> i64 { +block0(v0: i32): + v1 = uextend.i64 v0 + return v1 +} + +; block0: +; uext.w a0,a0 +; ret + +function %f_s_32_64(i32) -> i64 { +block0(v0: i32): + v1 = sextend.i64 v0 + return v1 +} + +; block0: +; sext.w a0,a0 +; ret + diff --git a/cranelift/filetests/filetests/runtests/alias.clif b/cranelift/filetests/filetests/runtests/alias.clif index 61ee5af49129..f556cbbb90d7 100644 --- a/cranelift/filetests/filetests/runtests/alias.clif +++ b/cranelift/filetests/filetests/runtests/alias.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %alias(i8) -> i8 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/runtests/arithmetic.clif b/cranelift/filetests/filetests/runtests/arithmetic.clif index 9fd5149b09cb..b0fadb4ef2bd 100644 --- a/cranelift/filetests/filetests/runtests/arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/arithmetic.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 has_m function %add_i64(i64, i64) -> i64 { block0(v0: i64,v1: i64): diff --git a/cranelift/filetests/filetests/runtests/atomic-cas-subword-little.clif b/cranelift/filetests/filetests/runtests/atomic-cas-subword-little.clif index cc508ae4b8df..ae5871737823 100644 --- a/cranelift/filetests/filetests/runtests/atomic-cas-subword-little.clif +++ b/cranelift/filetests/filetests/runtests/atomic-cas-subword-little.clif @@ -3,6 +3,7 @@ target s390x target aarch64 target aarch64 has_lse target x86_64 +target riscv64 ; We can't test that these instructions are right regarding atomicity, but we can ; test if they perform their operation correctly diff --git a/cranelift/filetests/filetests/runtests/atomic-cas.clif b/cranelift/filetests/filetests/runtests/atomic-cas.clif index 9c0783b9bb9a..c9ce52e21cc1 100644 --- a/cranelift/filetests/filetests/runtests/atomic-cas.clif +++ b/cranelift/filetests/filetests/runtests/atomic-cas.clif @@ -2,7 +2,8 @@ test run target aarch64 target aarch64 has_lse target x86_64 -target s390x +target s390x +target riscv64 has_a ; We can't test that these instructions are right regarding atomicity, but we can ; test if they perform their operation correctly diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif b/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif index 2c201f902dfd..cfb56f791eca 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw-little.clif @@ -4,6 +4,7 @@ target s390x has_mie2 target aarch64 target aarch64 has_lse target x86_64 +target riscv64 has_a ; We can't test that these instructions are right regarding atomicity, but we can ; test if they perform their operation correctly diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif b/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif index 163a88644805..a6d16c7a626d 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif @@ -4,6 +4,7 @@ target s390x has_mie2 target aarch64 target aarch64 has_lse target x86_64 +target riscv64 ; We can't test that these instructions are right regarding atomicity, but we can ; test if they perform their operation correctly diff --git a/cranelift/filetests/filetests/runtests/bextend.clif b/cranelift/filetests/filetests/runtests/bextend.clif index 24dc92997866..d7bccf50bd23 100644 --- a/cranelift/filetests/filetests/runtests/bextend.clif +++ b/cranelift/filetests/filetests/runtests/bextend.clif @@ -3,6 +3,7 @@ test run target aarch64 target x86_64 target s390x +target riscv64 function %bextend_b1_b8(b1) -> b8 { block0(v0: b1): diff --git a/cranelift/filetests/filetests/runtests/bint.clif b/cranelift/filetests/filetests/runtests/bint.clif index 66a0309d005d..44dc94ba4cc7 100644 --- a/cranelift/filetests/filetests/runtests/bint.clif +++ b/cranelift/filetests/filetests/runtests/bint.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %bint_b1_i8_true() -> i8 { block0: diff --git a/cranelift/filetests/filetests/runtests/bitops.clif b/cranelift/filetests/filetests/runtests/bitops.clif index 2e3f11427462..c72962da607b 100644 --- a/cranelift/filetests/filetests/runtests/bitops.clif +++ b/cranelift/filetests/filetests/runtests/bitops.clif @@ -1,6 +1,7 @@ test run target aarch64 target s390x +target riscv64 target s390x has_mie2 ; target x86_64 TODO: Not yet implemented on x86_64 diff --git a/cranelift/filetests/filetests/runtests/bitrev.clif b/cranelift/filetests/filetests/runtests/bitrev.clif index f0aa2194e310..2ac80ac535ce 100644 --- a/cranelift/filetests/filetests/runtests/bitrev.clif +++ b/cranelift/filetests/filetests/runtests/bitrev.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %bitrev_i8(i8) -> i8 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/runtests/bmask.clif b/cranelift/filetests/filetests/runtests/bmask.clif index fb87c0216650..9cd9da682d2b 100644 --- a/cranelift/filetests/filetests/runtests/bmask.clif +++ b/cranelift/filetests/filetests/runtests/bmask.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target s390x +target riscv64 function %bmask_b64_i64(b64) -> i64 { block0(v0: b64): diff --git a/cranelift/filetests/filetests/runtests/br.clif b/cranelift/filetests/filetests/runtests/br.clif index 8031f5735c58..2fac5b5ff7cb 100644 --- a/cranelift/filetests/filetests/runtests/br.clif +++ b/cranelift/filetests/filetests/runtests/br.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %jump() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/br_icmp.clif b/cranelift/filetests/filetests/runtests/br_icmp.clif index 0806ff1adbfb..5443833b355f 100644 --- a/cranelift/filetests/filetests/runtests/br_icmp.clif +++ b/cranelift/filetests/filetests/runtests/br_icmp.clif @@ -3,7 +3,7 @@ test run target aarch64 target s390x target x86_64 - +target riscv64 function %bricmp_eq_i64(i64, i64) -> b1 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/runtests/br_table.clif b/cranelift/filetests/filetests/runtests/br_table.clif index 4e4cbb49516e..d6b9f4bc94b4 100644 --- a/cranelift/filetests/filetests/runtests/br_table.clif +++ b/cranelift/filetests/filetests/runtests/br_table.clif @@ -4,6 +4,7 @@ target aarch64 target aarch64 use_bti target x86_64 target s390x +target riscv64 function %br_table_i32(i32) -> i32 { jt0 = jump_table [block1, block2, block2, block3] @@ -38,4 +39,4 @@ block5(v5: i32): ; run: %br_table_i32(4) == 8 ; run: %br_table_i32(5) == 9 ; run: %br_table_i32(6) == 10 -; run: %br_table_i32(-1) == 3 +; run: %br_table_i32(-1) == 3 \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/breduce.clif b/cranelift/filetests/filetests/runtests/breduce.clif index c9de6222ecab..08dfdde743a6 100644 --- a/cranelift/filetests/filetests/runtests/breduce.clif +++ b/cranelift/filetests/filetests/runtests/breduce.clif @@ -3,6 +3,7 @@ test run target aarch64 target x86_64 target s390x +target riscv64 function %breduce_b8_b1(b8) -> b1 { block0(v0: b8): diff --git a/cranelift/filetests/filetests/runtests/ceil.clif b/cranelift/filetests/filetests/runtests/ceil.clif index 89f8abcf8f2c..2bc6998f4347 100644 --- a/cranelift/filetests/filetests/runtests/ceil.clif +++ b/cranelift/filetests/filetests/runtests/ceil.clif @@ -4,6 +4,7 @@ target x86_64 target x86_64 has_sse41=false target aarch64 target s390x +target riscv64 function %ceil_f32(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/runtests/cls.clif b/cranelift/filetests/filetests/runtests/cls.clif index fdd937bd35ba..d87c261939aa 100644 --- a/cranelift/filetests/filetests/runtests/cls.clif +++ b/cranelift/filetests/filetests/runtests/cls.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 target s390x ; not implemented on `x86_64` diff --git a/cranelift/filetests/filetests/runtests/clz.clif b/cranelift/filetests/filetests/runtests/clz.clif index dced407b742c..98355af698c7 100644 --- a/cranelift/filetests/filetests/runtests/clz.clif +++ b/cranelift/filetests/filetests/runtests/clz.clif @@ -4,6 +4,7 @@ target aarch64 target s390x target x86_64 target x86_64 has_lzcnt +target riscv64 function %clz_i8(i8) -> i8 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/runtests/const.clif b/cranelift/filetests/filetests/runtests/const.clif index 579b936eeca7..36c4143879c1 100644 --- a/cranelift/filetests/filetests/runtests/const.clif +++ b/cranelift/filetests/filetests/runtests/const.clif @@ -2,6 +2,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %i8_iconst_0() -> i8 { block0: diff --git a/cranelift/filetests/filetests/runtests/conversion.clif b/cranelift/filetests/filetests/runtests/conversion.clif index 011abcfc743c..50d17906ca05 100644 --- a/cranelift/filetests/filetests/runtests/conversion.clif +++ b/cranelift/filetests/filetests/runtests/conversion.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %fcvt_to_sint(f32) -> i32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/runtests/conversions-load-store.clif b/cranelift/filetests/filetests/runtests/conversions-load-store.clif index 38ba57c7abbb..c30aa19b0df5 100644 --- a/cranelift/filetests/filetests/runtests/conversions-load-store.clif +++ b/cranelift/filetests/filetests/runtests/conversions-load-store.clif @@ -3,6 +3,7 @@ test run target x86_64 target s390x target aarch64 +;; target riscv64 vector type not supported. function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/runtests/ctz.clif b/cranelift/filetests/filetests/runtests/ctz.clif index 5f8f7023da04..30516386770d 100644 --- a/cranelift/filetests/filetests/runtests/ctz.clif +++ b/cranelift/filetests/filetests/runtests/ctz.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 target x86_64 has_bmi1 function %ctz_i8(i8) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/div-checks.clif b/cranelift/filetests/filetests/runtests/div-checks.clif index b1edb4f4c157..3a854adbad70 100644 --- a/cranelift/filetests/filetests/runtests/div-checks.clif +++ b/cranelift/filetests/filetests/runtests/div-checks.clif @@ -3,6 +3,8 @@ set avoid_div_traps=false target aarch64 target s390x target x86_64 +target riscv64 + ; Tests that the `avoid_div_traps` flag prevents a trap when {s,u}rem is called ; with INT_MIN % -1. diff --git a/cranelift/filetests/filetests/runtests/extend.clif b/cranelift/filetests/filetests/runtests/extend.clif index 4ce87b411ff2..f5b77337697d 100644 --- a/cranelift/filetests/filetests/runtests/extend.clif +++ b/cranelift/filetests/filetests/runtests/extend.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 ;;;; basic uextend diff --git a/cranelift/filetests/filetests/runtests/fabs.clif b/cranelift/filetests/filetests/runtests/fabs.clif index 4d63273efbd5..02ad82b82547 100644 --- a/cranelift/filetests/filetests/runtests/fabs.clif +++ b/cranelift/filetests/filetests/runtests/fabs.clif @@ -3,6 +3,7 @@ test run target aarch64 target x86_64 target s390x +target riscv64 function %fabs_f32(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/runtests/fadd.clif b/cranelift/filetests/filetests/runtests/fadd.clif index 6448615ae6e1..0d7debb7de83 100644 --- a/cranelift/filetests/filetests/runtests/fadd.clif +++ b/cranelift/filetests/filetests/runtests/fadd.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fadd_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-eq.clif b/cranelift/filetests/filetests/runtests/fcmp-eq.clif index 1bac6a35d299..01d2d512c754 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-eq.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-eq.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fcmp_eq_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-ge.clif b/cranelift/filetests/filetests/runtests/fcmp-ge.clif index 311698597eaf..aace5b5dc3d2 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-ge.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-ge.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fcmp_ge_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-gt.clif b/cranelift/filetests/filetests/runtests/fcmp-gt.clif index 250ebc0e0d9b..f29e0c99d598 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-gt.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-gt.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fcmp_gt_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-le.clif b/cranelift/filetests/filetests/runtests/fcmp-le.clif index 45f30594060a..f5ea4e17fde0 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-le.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-le.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fcmp_le_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-lt.clif b/cranelift/filetests/filetests/runtests/fcmp-lt.clif index c94501f30a0f..4c57a5338dfc 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-lt.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-lt.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fcmp_lt_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-ne.clif b/cranelift/filetests/filetests/runtests/fcmp-ne.clif index 9aeec4f20bac..570ab86f01cd 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-ne.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-ne.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fcmp_ne_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-one.clif b/cranelift/filetests/filetests/runtests/fcmp-one.clif index cf5c48d60039..8c48d5dfc33b 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-one.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-one.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target s390x +target riscv64 function %fcmp_one_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-ord.clif b/cranelift/filetests/filetests/runtests/fcmp-ord.clif index 78aef77cad17..514eadaf2927 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-ord.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-ord.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target s390x +target riscv64 function %fcmp_ord_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-ueq.clif b/cranelift/filetests/filetests/runtests/fcmp-ueq.clif index d6f65885ae4c..f20aae820229 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-ueq.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-ueq.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target s390x +target riscv64 function %fcmp_ueq_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-uge.clif b/cranelift/filetests/filetests/runtests/fcmp-uge.clif index 8e699321e2b9..d5d6e7398ec7 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-uge.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-uge.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target s390x +target riscv64 function %fcmp_uge_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-ugt.clif b/cranelift/filetests/filetests/runtests/fcmp-ugt.clif index 0afcd5672540..af3b09dec883 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-ugt.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-ugt.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target s390x +target riscv64 function %fcmp_ugt_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-ule.clif b/cranelift/filetests/filetests/runtests/fcmp-ule.clif index 91b4dd90cae1..9b84f0670255 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-ule.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-ule.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target s390x +target riscv64 function %fcmp_ule_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-ult.clif b/cranelift/filetests/filetests/runtests/fcmp-ult.clif index de277c7cd129..8982e235d9f3 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-ult.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-ult.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target s390x +target riscv64 function %fcmp_ult_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcmp-uno.clif b/cranelift/filetests/filetests/runtests/fcmp-uno.clif index b698b61047b5..0fc354df259e 100644 --- a/cranelift/filetests/filetests/runtests/fcmp-uno.clif +++ b/cranelift/filetests/filetests/runtests/fcmp-uno.clif @@ -2,6 +2,8 @@ test interpret test run target x86_64 target s390x +target riscv64 + function %fcmp_uno_f32(f32, f32) -> b1 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fcopysign.clif b/cranelift/filetests/filetests/runtests/fcopysign.clif index 281143783682..0ba6f313c9d0 100644 --- a/cranelift/filetests/filetests/runtests/fcopysign.clif +++ b/cranelift/filetests/filetests/runtests/fcopysign.clif @@ -3,6 +3,7 @@ test run target aarch64 target x86_64 target s390x +target riscv64 function %fcopysign_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fdiv.clif b/cranelift/filetests/filetests/runtests/fdiv.clif index 681f9b4bdbe1..325648e3a404 100644 --- a/cranelift/filetests/filetests/runtests/fdiv.clif +++ b/cranelift/filetests/filetests/runtests/fdiv.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fdiv_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/floor.clif b/cranelift/filetests/filetests/runtests/floor.clif index cbb67aad4f81..ff3ffe1789df 100644 --- a/cranelift/filetests/filetests/runtests/floor.clif +++ b/cranelift/filetests/filetests/runtests/floor.clif @@ -4,6 +4,7 @@ target x86_64 target x86_64 has_sse41=false target aarch64 target s390x +target riscv64 function %floor_f32(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/runtests/fma.clif b/cranelift/filetests/filetests/runtests/fma.clif index 22f8b8bd0634..f47aa0b11500 100644 --- a/cranelift/filetests/filetests/runtests/fma.clif +++ b/cranelift/filetests/filetests/runtests/fma.clif @@ -4,6 +4,7 @@ target aarch64 target s390x target x86_64 has_avx has_fma target x86_64 has_avx=false has_fma=false +target riscv64 function %fma_f32(f32, f32, f32) -> f32 { block0(v0: f32, v1: f32, v2: f32): @@ -148,4 +149,4 @@ block0(v0: f32, v1: f32, v2: f32): v4 = fma v0, v1, v3 return v4 } -; run: %fma_load_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6 +; run: %fma_load_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6 \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/fmax-pseudo.clif b/cranelift/filetests/filetests/runtests/fmax-pseudo.clif index f5bf2a002ad1..274992afd7f8 100644 --- a/cranelift/filetests/filetests/runtests/fmax-pseudo.clif +++ b/cranelift/filetests/filetests/runtests/fmax-pseudo.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target aarch64 +target riscv64 ; target s390x FIXME: This currently fails under qemu due to a qemu bug function %fmax_p_f32(f32, f32) -> f32 { diff --git a/cranelift/filetests/filetests/runtests/fmax.clif b/cranelift/filetests/filetests/runtests/fmax.clif index 10d72e8f478d..31de6e052210 100644 --- a/cranelift/filetests/filetests/runtests/fmax.clif +++ b/cranelift/filetests/filetests/runtests/fmax.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fmax_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fmin-pseudo.clif b/cranelift/filetests/filetests/runtests/fmin-pseudo.clif index cb4857d8daba..fb9b6e8402eb 100644 --- a/cranelift/filetests/filetests/runtests/fmin-pseudo.clif +++ b/cranelift/filetests/filetests/runtests/fmin-pseudo.clif @@ -2,6 +2,7 @@ test interpret test run target x86_64 target aarch64 +target riscv64 ; target s390x FIXME: This currently fails under qemu due to a qemu bug function %fmin_p_f32(f32, f32) -> f32 { diff --git a/cranelift/filetests/filetests/runtests/fmin.clif b/cranelift/filetests/filetests/runtests/fmin.clif index 9f436f54586b..9693f632d45a 100644 --- a/cranelift/filetests/filetests/runtests/fmin.clif +++ b/cranelift/filetests/filetests/runtests/fmin.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fmin_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fmul.clif b/cranelift/filetests/filetests/runtests/fmul.clif index d47703201056..c7dede8759ad 100644 --- a/cranelift/filetests/filetests/runtests/fmul.clif +++ b/cranelift/filetests/filetests/runtests/fmul.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fmul_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/fneg.clif b/cranelift/filetests/filetests/runtests/fneg.clif index bfe63f924b0c..24c91a3b4da4 100644 --- a/cranelift/filetests/filetests/runtests/fneg.clif +++ b/cranelift/filetests/filetests/runtests/fneg.clif @@ -3,6 +3,7 @@ test run target aarch64 target x86_64 target s390x +target riscv64 function %fneg_f32(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/runtests/fsub.clif b/cranelift/filetests/filetests/runtests/fsub.clif index 8976af7a8553..0d719e873f8c 100644 --- a/cranelift/filetests/filetests/runtests/fsub.clif +++ b/cranelift/filetests/filetests/runtests/fsub.clif @@ -3,6 +3,7 @@ test run target x86_64 target aarch64 target s390x +target riscv64 function %fsub_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/global_value.clif b/cranelift/filetests/filetests/runtests/global_value.clif index 59c17c1e2c83..e9514b1d6bd2 100644 --- a/cranelift/filetests/filetests/runtests/global_value.clif +++ b/cranelift/filetests/filetests/runtests/global_value.clif @@ -3,6 +3,7 @@ test run target x86_64 target s390x target aarch64 +target riscv64 ; Store a value in the heap using `heap_addr` and load it using `global_value` function %store_load(i64 vmctx, i64, i32) -> i32 { diff --git a/cranelift/filetests/filetests/runtests/heap.clif b/cranelift/filetests/filetests/runtests/heap.clif index 9b42070eb400..9b283db70271 100644 --- a/cranelift/filetests/filetests/runtests/heap.clif +++ b/cranelift/filetests/filetests/runtests/heap.clif @@ -3,7 +3,7 @@ test run target x86_64 target s390x target aarch64 - +target riscv64 function %static_heap_i64(i64 vmctx, i64, i32) -> i32 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/runtests/i128-arithmetic.clif b/cranelift/filetests/filetests/runtests/i128-arithmetic.clif index 6898b3eb7192..d45890941db9 100644 --- a/cranelift/filetests/filetests/runtests/i128-arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/i128-arithmetic.clif @@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %add_i128(i128, i128) -> i128 { block0(v0: i128,v1: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-bandnot.clif b/cranelift/filetests/filetests/runtests/i128-bandnot.clif index 567c00f865cf..f037684e3471 100644 --- a/cranelift/filetests/filetests/runtests/i128-bandnot.clif +++ b/cranelift/filetests/filetests/runtests/i128-bandnot.clif @@ -1,5 +1,6 @@ test run target aarch64 +target riscv64 target s390x function %band_not_i128(i128, i128) -> i128 { diff --git a/cranelift/filetests/filetests/runtests/i128-bextend.clif b/cranelift/filetests/filetests/runtests/i128-bextend.clif index 1d9c9e2a7f91..c49ac91578be 100644 --- a/cranelift/filetests/filetests/runtests/i128-bextend.clif +++ b/cranelift/filetests/filetests/runtests/i128-bextend.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target s390x +target riscv64 function %bextend_b1_b128(b1) -> b128 { block0(v0: b1): diff --git a/cranelift/filetests/filetests/runtests/i128-bint.clif b/cranelift/filetests/filetests/runtests/i128-bint.clif index bc59e993eae5..d16a2822aefd 100644 --- a/cranelift/filetests/filetests/runtests/i128-bint.clif +++ b/cranelift/filetests/filetests/runtests/i128-bint.clif @@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %bint_b1_i128_true() -> i128 { block0: diff --git a/cranelift/filetests/filetests/runtests/i128-bitops-count.clif b/cranelift/filetests/filetests/runtests/i128-bitops-count.clif index abcd2751261f..533fdce315a0 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitops-count.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitops-count.clif @@ -3,6 +3,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %ctz_i128(i128) -> i128 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-bitops.clif b/cranelift/filetests/filetests/runtests/i128-bitops.clif index 6bb2c0b847c0..a85fc39de9f9 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitops.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitops.clif @@ -3,6 +3,8 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 + function %bnot_i128(i128) -> i128 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-bitrev.clif b/cranelift/filetests/filetests/runtests/i128-bitrev.clif index 55616467442e..ed07bc259c88 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitrev.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitrev.clif @@ -3,6 +3,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %reverse_bits_zero() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/i128-bmask.clif b/cranelift/filetests/filetests/runtests/i128-bmask.clif index df51fccaab31..37d03426e067 100644 --- a/cranelift/filetests/filetests/runtests/i128-bmask.clif +++ b/cranelift/filetests/filetests/runtests/i128-bmask.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 target s390x function %bmask_b128_i128(b128) -> i128 { diff --git a/cranelift/filetests/filetests/runtests/i128-bornot.clif b/cranelift/filetests/filetests/runtests/i128-bornot.clif index bfc6f7962fd2..5489c53211a6 100644 --- a/cranelift/filetests/filetests/runtests/i128-bornot.clif +++ b/cranelift/filetests/filetests/runtests/i128-bornot.clif @@ -1,5 +1,6 @@ test run target aarch64 +target riscv64 target s390x function %bor_not_i128(i128, i128) -> i128 { diff --git a/cranelift/filetests/filetests/runtests/i128-br.clif b/cranelift/filetests/filetests/runtests/i128-br.clif index 0434313f939a..098cf581465a 100644 --- a/cranelift/filetests/filetests/runtests/i128-br.clif +++ b/cranelift/filetests/filetests/runtests/i128-br.clif @@ -3,7 +3,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 - +target riscv64 function %i128_brz(i128) -> b1 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-breduce.clif b/cranelift/filetests/filetests/runtests/i128-breduce.clif index 93efa6c7a66e..de33346cb11c 100644 --- a/cranelift/filetests/filetests/runtests/i128-breduce.clif +++ b/cranelift/filetests/filetests/runtests/i128-breduce.clif @@ -1,4 +1,5 @@ test interpret +target riscv64 function %breduce_b128_b1(b128) -> b1 { block0(v0: b128): diff --git a/cranelift/filetests/filetests/runtests/i128-bricmp.clif b/cranelift/filetests/filetests/runtests/i128-bricmp.clif index 0cf19c4196d9..14c58e90dd19 100644 --- a/cranelift/filetests/filetests/runtests/i128-bricmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-bricmp.clif @@ -1,5 +1,6 @@ test run target aarch64 +target riscv64 target s390x function %i128_bricmp_eq(i128, i128) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/i128-bxornot.clif b/cranelift/filetests/filetests/runtests/i128-bxornot.clif index ee48c3a6e50b..d5678f3467cd 100644 --- a/cranelift/filetests/filetests/runtests/i128-bxornot.clif +++ b/cranelift/filetests/filetests/runtests/i128-bxornot.clif @@ -1,5 +1,6 @@ test run target aarch64 +target riscv64 target s390x function %bxor_not_i128(i128, i128) -> i128 { diff --git a/cranelift/filetests/filetests/runtests/i128-cls.clif b/cranelift/filetests/filetests/runtests/i128-cls.clif index 90c1c901f990..cd9deac1029a 100644 --- a/cranelift/filetests/filetests/runtests/i128-cls.clif +++ b/cranelift/filetests/filetests/runtests/i128-cls.clif @@ -1,5 +1,6 @@ test run target aarch64 +target riscv64 target s390x function %cls_i128(i128) -> i128 { diff --git a/cranelift/filetests/filetests/runtests/i128-concat-split.clif b/cranelift/filetests/filetests/runtests/i128-concat-split.clif index 9c28faa5de46..ae39c82fe153 100644 --- a/cranelift/filetests/filetests/runtests/i128-concat-split.clif +++ b/cranelift/filetests/filetests/runtests/i128-concat-split.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %iconcat_isplit(i64, i64) -> i64, i64 { block0(v0: i64, v1: i64): @@ -15,3 +16,4 @@ block0(v0: i64, v1: i64): ; run: %iconcat_isplit(0xFFFFFFFF_FFFFFFFF, 0) == [0xFFFFFFFF_FFFFFFFF, 0] ; run: %iconcat_isplit(0, 0xFFFFFFFF_FFFFFFFF) == [0, 0xFFFFFFFF_FFFFFFFF] ; run: %iconcat_isplit(0x01010101_01010101, 0x02020202_02020202) == [0x01010101_01010101, 0x02020202_02020202] + diff --git a/cranelift/filetests/filetests/runtests/i128-const.clif b/cranelift/filetests/filetests/runtests/i128-const.clif index 2a7ba7a2c464..ee7392bfe2e3 100644 --- a/cranelift/filetests/filetests/runtests/i128-const.clif +++ b/cranelift/filetests/filetests/runtests/i128-const.clif @@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %i128_const_0() -> i128 { block0: diff --git a/cranelift/filetests/filetests/runtests/i128-extend.clif b/cranelift/filetests/filetests/runtests/i128-extend.clif index 1b2b543fe965..43ddc88d6d84 100644 --- a/cranelift/filetests/filetests/runtests/i128-extend.clif +++ b/cranelift/filetests/filetests/runtests/i128-extend.clif @@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %i128_uextend_i64(i64) -> i128 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/runtests/i128-icmp.clif b/cranelift/filetests/filetests/runtests/i128-icmp.clif index 2c48f849851d..8b8c679d68d1 100644 --- a/cranelift/filetests/filetests/runtests/i128-icmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-icmp.clif @@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %icmp_eq_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-ireduce.clif b/cranelift/filetests/filetests/runtests/i128-ireduce.clif index 5f38e5ea7eb1..6c8b47f43873 100644 --- a/cranelift/filetests/filetests/runtests/i128-ireduce.clif +++ b/cranelift/filetests/filetests/runtests/i128-ireduce.clif @@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %ireduce_128_64(i128) -> i64 { block0(v0: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-load-store.clif b/cranelift/filetests/filetests/runtests/i128-load-store.clif index 9663ead06709..70607485eef6 100644 --- a/cranelift/filetests/filetests/runtests/i128-load-store.clif +++ b/cranelift/filetests/filetests/runtests/i128-load-store.clif @@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true set enable_probestack=false target x86_64 target aarch64 +target riscv64 target s390x function %i128_stack_store_load(i128) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/i128-rotate.clif b/cranelift/filetests/filetests/runtests/i128-rotate.clif index 9fa34889885b..ba41fe9bc680 100644 --- a/cranelift/filetests/filetests/runtests/i128-rotate.clif +++ b/cranelift/filetests/filetests/runtests/i128-rotate.clif @@ -4,6 +4,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %rotl(i128, i8) -> i128 { block0(v0: i128, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/i128-select.clif b/cranelift/filetests/filetests/runtests/i128-select.clif index 7355b28a1814..4a049f03beb5 100644 --- a/cranelift/filetests/filetests/runtests/i128-select.clif +++ b/cranelift/filetests/filetests/runtests/i128-select.clif @@ -3,6 +3,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 +target riscv64 function %i128_select(b1, i128, i128) -> i128 { block0(v0: b1, v1: i128, v2: i128): diff --git a/cranelift/filetests/filetests/runtests/i128-shifts.clif b/cranelift/filetests/filetests/runtests/i128-shifts.clif index ec0247a775c2..feba9faffc1b 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts.clif @@ -4,7 +4,7 @@ set enable_llvm_abi_extensions=true target aarch64 target s390x target x86_64 - +target riscv64 function %ishl_i128_i128(i128, i8) -> i128 { block0(v0: i128, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/iabs.clif b/cranelift/filetests/filetests/runtests/iabs.clif index f5552c30ec3b..3048f02bb02d 100644 --- a/cranelift/filetests/filetests/runtests/iabs.clif +++ b/cranelift/filetests/filetests/runtests/iabs.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target s390x +target riscv64 ; x86_64 only supports vector iabs function %iabs_i8(i8) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/icmp-eq-imm.clif b/cranelift/filetests/filetests/runtests/icmp-eq-imm.clif index 35dc4d481d8d..07edcd03c9cd 100644 --- a/cranelift/filetests/filetests/runtests/icmp-eq-imm.clif +++ b/cranelift/filetests/filetests/runtests/icmp-eq-imm.clif @@ -3,6 +3,7 @@ test run target aarch64 target x86_64 target s390x +target riscv64 function %icmp_imm_eq_i8(i8) -> b1 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-eq.clif b/cranelift/filetests/filetests/runtests/icmp-eq.clif index 4ad04e4c803e..1eb3fc36625c 100644 --- a/cranelift/filetests/filetests/runtests/icmp-eq.clif +++ b/cranelift/filetests/filetests/runtests/icmp-eq.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target x86_64 +target riscv64 target s390x function %icmp_eq_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-ne.clif b/cranelift/filetests/filetests/runtests/icmp-ne.clif index f84ce72a8c89..fa643a2371b4 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ne.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ne.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target x86_64 +target riscv64 target s390x function %icmp_ne_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-sge.clif b/cranelift/filetests/filetests/runtests/icmp-sge.clif index 246a7dd1d0cc..cb8eb2a4f940 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sge.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sge.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target x86_64 +target riscv64 target s390x diff --git a/cranelift/filetests/filetests/runtests/icmp-sgt.clif b/cranelift/filetests/filetests/runtests/icmp-sgt.clif index e5002bc5e6b6..4c39f598d901 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sgt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sgt.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target x86_64 +target riscv64 target s390x diff --git a/cranelift/filetests/filetests/runtests/icmp-sle.clif b/cranelift/filetests/filetests/runtests/icmp-sle.clif index 7f2a9df3ceff..706d815d2791 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sle.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sle.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target x86_64 +target riscv64 target s390x diff --git a/cranelift/filetests/filetests/runtests/icmp-slt.clif b/cranelift/filetests/filetests/runtests/icmp-slt.clif index 949dfa1eb9c8..29c5d5ddc4c1 100644 --- a/cranelift/filetests/filetests/runtests/icmp-slt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-slt.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target x86_64 +target riscv64 target s390x function %icmp_slt_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-uge.clif b/cranelift/filetests/filetests/runtests/icmp-uge.clif index 51dc34ae9ab1..bb59e0f60454 100644 --- a/cranelift/filetests/filetests/runtests/icmp-uge.clif +++ b/cranelift/filetests/filetests/runtests/icmp-uge.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target x86_64 +target riscv64 target s390x function %icmp_uge_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-ugt.clif b/cranelift/filetests/filetests/runtests/icmp-ugt.clif index 76d67e0cbacb..2acad5013da8 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ugt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ugt.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %icmp_ugt_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ule.clif b/cranelift/filetests/filetests/runtests/icmp-ule.clif index 11b94a9bb0ed..8bf76573dcde 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ule.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ule.clif @@ -2,6 +2,7 @@ test interpret test run target aarch64 target x86_64 +target riscv64 target s390x function %icmp_ule_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp.clif b/cranelift/filetests/filetests/runtests/icmp.clif index bf80dc74f6c4..37ed9f61fbd6 100644 --- a/cranelift/filetests/filetests/runtests/icmp.clif +++ b/cranelift/filetests/filetests/runtests/icmp.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 ; This test is also a regression test for aarch64. ; We were not correctly handling the fact that the rhs constant value diff --git a/cranelift/filetests/filetests/runtests/integer-minmax.clif b/cranelift/filetests/filetests/runtests/integer-minmax.clif index 423ce343356d..ef27ec457c2b 100644 --- a/cranelift/filetests/filetests/runtests/integer-minmax.clif +++ b/cranelift/filetests/filetests/runtests/integer-minmax.clif @@ -3,6 +3,8 @@ test run ; target aarch64 ; target s390x target x86_64 +target riscv64 + ; sort three signed i8s with imin and imax only function %isort3(i8, i8, i8) -> i8, i8, i8 { diff --git a/cranelift/filetests/filetests/runtests/ireduce.clif b/cranelift/filetests/filetests/runtests/ireduce.clif index b103cbb5b877..8e0abd241e0f 100644 --- a/cranelift/filetests/filetests/runtests/ireduce.clif +++ b/cranelift/filetests/filetests/runtests/ireduce.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %ireduce_i16_i8(i16) -> i8 { block0(v0: i16): diff --git a/cranelift/filetests/filetests/runtests/load-op-store.clif b/cranelift/filetests/filetests/runtests/load-op-store.clif index ebf692b447d9..0d7ba86c4882 100644 --- a/cranelift/filetests/filetests/runtests/load-op-store.clif +++ b/cranelift/filetests/filetests/runtests/load-op-store.clif @@ -2,6 +2,8 @@ test run target x86_64 target s390x target aarch64 +target riscv64 + function %load_op_store_iadd_i64(i64 vmctx, i64, i64) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/runtests/nearest.clif b/cranelift/filetests/filetests/runtests/nearest.clif index cd2397e9adba..d363d8730a63 100644 --- a/cranelift/filetests/filetests/runtests/nearest.clif +++ b/cranelift/filetests/filetests/runtests/nearest.clif @@ -4,6 +4,7 @@ target x86_64 target x86_64 has_sse41=false target aarch64 target s390x +target riscv64 function %nearest_f32(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/runtests/popcnt-interpret.clif b/cranelift/filetests/filetests/runtests/popcnt-interpret.clif index ccca4f10d2b3..55096397e89a 100644 --- a/cranelift/filetests/filetests/runtests/popcnt-interpret.clif +++ b/cranelift/filetests/filetests/runtests/popcnt-interpret.clif @@ -22,3 +22,11 @@ block0(v0: i64x2): } ; run: %popcnt_i64x2([1 0x4000000000000000]) == [1 1] ; run: %popcnt_i64x2([0xffffffffffffffff 0]) == [64 0] + + +function %popcnt_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = popcnt v0 + return v1 +} +; run: %popcnt_i8x16([1 1 1 1 0x40 0x40 0x40 0x40 0xff 0xff 0xff 0xff 0 0 0 0]) == [1 1 1 1 1 1 1 1 8 8 8 8 0 0 0 0] diff --git a/cranelift/filetests/filetests/runtests/popcnt.clif b/cranelift/filetests/filetests/runtests/popcnt.clif index 560031d4de6d..be7b6717b6d7 100644 --- a/cranelift/filetests/filetests/runtests/popcnt.clif +++ b/cranelift/filetests/filetests/runtests/popcnt.clif @@ -4,6 +4,7 @@ target aarch64 target s390x target x86_64 target x86_64 has_popcnt +target riscv64 function %popcnt_i8(i8) -> i8 { block0(v0: i8): @@ -93,9 +94,3 @@ block0(v0: i64): ; run: %inv_popcnt_i64(-1) == 0 ; run: %inv_popcnt_i64(0) == 64 -function %popcnt_i8x16(i8x16) -> i8x16 { -block0(v0: i8x16): - v1 = popcnt v0 - return v1 -} -; run: %popcnt_i8x16([1 1 1 1 0x40 0x40 0x40 0x40 0xff 0xff 0xff 0xff 0 0 0 0]) == [1 1 1 1 1 1 1 1 8 8 8 8 0 0 0 0] diff --git a/cranelift/filetests/filetests/runtests/rotl.clif b/cranelift/filetests/filetests/runtests/rotl.clif index cdf8fde2f480..0d24c19fcf3c 100644 --- a/cranelift/filetests/filetests/runtests/rotl.clif +++ b/cranelift/filetests/filetests/runtests/rotl.clif @@ -3,7 +3,7 @@ test run target aarch64 target x86_64 target s390x - +target riscv64 function %rotl_i64_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/runtests/rotr.clif b/cranelift/filetests/filetests/runtests/rotr.clif index 5c0236f45701..2f33b4aa4612 100644 --- a/cranelift/filetests/filetests/runtests/rotr.clif +++ b/cranelift/filetests/filetests/runtests/rotr.clif @@ -3,6 +3,7 @@ test run target aarch64 target x86_64 target s390x +target riscv64 function %rotr_i64_i64(i64, i64) -> i64 { diff --git a/cranelift/filetests/filetests/runtests/select.clif b/cranelift/filetests/filetests/runtests/select.clif index e93f082f6569..46cdb79abb2e 100644 --- a/cranelift/filetests/filetests/runtests/select.clif +++ b/cranelift/filetests/filetests/runtests/select.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 function %select_eq_f32(f32, f32) -> i32 { block0(v0: f32, v1: f32): diff --git a/cranelift/filetests/filetests/runtests/shifts.clif b/cranelift/filetests/filetests/runtests/shifts.clif index 224ac500d1bb..7e06d6af5333 100644 --- a/cranelift/filetests/filetests/runtests/shifts.clif +++ b/cranelift/filetests/filetests/runtests/shifts.clif @@ -3,7 +3,7 @@ test run target aarch64 target x86_64 target s390x - +target riscv64 function %ishl_i64_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/runtests/smulhi-aarch64.clif b/cranelift/filetests/filetests/runtests/smulhi-aarch64.clif index 031602552e80..128089ca3cf2 100644 --- a/cranelift/filetests/filetests/runtests/smulhi-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/smulhi-aarch64.clif @@ -1,6 +1,7 @@ test interpret test run target aarch64 +target riscv64 target s390x ; x86_64 backend only supports `i16`, `i32`, and `i64` types. diff --git a/cranelift/filetests/filetests/runtests/smulhi.clif b/cranelift/filetests/filetests/runtests/smulhi.clif index 979ee2588e21..6f1e71e7aa9a 100644 --- a/cranelift/filetests/filetests/runtests/smulhi.clif +++ b/cranelift/filetests/filetests/runtests/smulhi.clif @@ -4,6 +4,8 @@ target aarch64 target s390x set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41 +target riscv64 + function %smulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): diff --git a/cranelift/filetests/filetests/runtests/spill-reload.clif b/cranelift/filetests/filetests/runtests/spill-reload.clif index f2c6bd0fe67f..af5b687d46df 100644 --- a/cranelift/filetests/filetests/runtests/spill-reload.clif +++ b/cranelift/filetests/filetests/runtests/spill-reload.clif @@ -2,6 +2,7 @@ test run target s390x target aarch64 target x86_64 +target riscv64 function %f(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i64 { block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32, v9: i32, v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32): diff --git a/cranelift/filetests/filetests/runtests/sqrt.clif b/cranelift/filetests/filetests/runtests/sqrt.clif index e41d706bd12e..18fa96857b48 100644 --- a/cranelift/filetests/filetests/runtests/sqrt.clif +++ b/cranelift/filetests/filetests/runtests/sqrt.clif @@ -3,6 +3,7 @@ test run target aarch64 target x86_64 target s390x +target riscv64 function %sqrt_f32(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/runtests/srem.clif b/cranelift/filetests/filetests/runtests/srem.clif index 102f4c6b2dbc..ec6d3b76d9b5 100644 --- a/cranelift/filetests/filetests/runtests/srem.clif +++ b/cranelift/filetests/filetests/runtests/srem.clif @@ -1,13 +1,11 @@ test interpret test run -target aarch64 -target s390x -target x86_64 ; Test these inputs without div traps, it shouldn't affect normal inputs set avoid_div_traps target aarch64 target s390x target x86_64 +target riscv64 function %srem_i64(i64, i64) -> i64 { block0(v0: i64,v1: i64): diff --git a/cranelift/filetests/filetests/runtests/stack-addr-64.clif b/cranelift/filetests/filetests/runtests/stack-addr-64.clif index 5dd452702483..3d8e26144aef 100644 --- a/cranelift/filetests/filetests/runtests/stack-addr-64.clif +++ b/cranelift/filetests/filetests/runtests/stack-addr-64.clif @@ -3,7 +3,7 @@ test run target x86_64 target s390x target aarch64 - +target riscv64 function %stack_addr_iadd(i64) -> b1 { ss0 = explicit_slot 16 diff --git a/cranelift/filetests/filetests/runtests/stack.clif b/cranelift/filetests/filetests/runtests/stack.clif index 698dcd681c32..54dba258e072 100644 --- a/cranelift/filetests/filetests/runtests/stack.clif +++ b/cranelift/filetests/filetests/runtests/stack.clif @@ -5,6 +5,7 @@ set enable_probestack=false target x86_64 target s390x target aarch64 +target riscv64 function %stack_simple(i64) -> i64 { ss0 = explicit_slot 8 diff --git a/cranelift/filetests/filetests/runtests/table_addr.clif b/cranelift/filetests/filetests/runtests/table_addr.clif index e20db1c6ea54..f77d356e367d 100644 --- a/cranelift/filetests/filetests/runtests/table_addr.clif +++ b/cranelift/filetests/filetests/runtests/table_addr.clif @@ -3,6 +3,7 @@ test run target x86_64 target s390x target aarch64 +target riscv64 function %set_get_i64(i64 vmctx, i64, i64) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/runtests/trunc.clif b/cranelift/filetests/filetests/runtests/trunc.clif index 0de97280fb63..eaaf276149a0 100644 --- a/cranelift/filetests/filetests/runtests/trunc.clif +++ b/cranelift/filetests/filetests/runtests/trunc.clif @@ -4,6 +4,7 @@ target x86_64 target x86_64 has_sse41=false target aarch64 target s390x +target riscv64 function %trunc_f32(f32) -> f32 { block0(v0: f32): diff --git a/cranelift/filetests/filetests/runtests/umulhi.clif b/cranelift/filetests/filetests/runtests/umulhi.clif index 67bbf616e044..6e41a0794eb6 100644 --- a/cranelift/filetests/filetests/runtests/umulhi.clif +++ b/cranelift/filetests/filetests/runtests/umulhi.clif @@ -4,6 +4,7 @@ target aarch64 set enable_simd target x86_64 has_sse3 has_ssse3 has_sse41 target s390x +target riscv64 function %umulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): diff --git a/cranelift/filetests/filetests/runtests/urem.clif b/cranelift/filetests/filetests/runtests/urem.clif index 6dd867215c22..f0b6bb067317 100644 --- a/cranelift/filetests/filetests/runtests/urem.clif +++ b/cranelift/filetests/filetests/runtests/urem.clif @@ -3,6 +3,7 @@ test run target aarch64 target s390x target x86_64 +target riscv64 ; Test these inputs without div traps, it shouldn't affect normal inputs set avoid_div_traps target aarch64 diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs index 47f2a4a2ab78..3b03470c0049 100644 --- a/cranelift/filetests/src/test_run.rs +++ b/cranelift/filetests/src/test_run.rs @@ -16,6 +16,7 @@ use cranelift_reader::TestCommand; use cranelift_reader::{parse_run_command, TestFile}; use log::{info, trace}; use std::borrow::Cow; +use target_lexicon::Architecture; struct TestRun; @@ -46,6 +47,30 @@ fn build_host_isa( builder.finish(flags).unwrap() } +fn is_riscv64_compatible( + host: target_lexicon::Riscv64Architecture, + req: target_lexicon::Riscv64Architecture, +) -> bool { + match host { + // Riscv64gc is short for RV64IMAFDCZicsr_Zifencei. + // So can run them all. + target_lexicon::Riscv64Architecture::Riscv64gc => true, + // Riscv64imac can run when req is not Riscv64gc. + target_lexicon::Riscv64Architecture::Riscv64imac + if req != target_lexicon::Riscv64Architecture::Riscv64gc => + { + true + } + // Riscv64 is just basic extension. + target_lexicon::Riscv64Architecture::Riscv64 + if req == target_lexicon::Riscv64Architecture::Riscv64 => + { + true + } + _ => false, + } +} + /// Checks if the host's ISA is compatible with the one requested by the test. fn is_isa_compatible( file_path: &str, @@ -57,11 +82,17 @@ fn is_isa_compatible( // since we won't be able to natively execute machine code. let host_arch = host.triple().architecture; let requested_arch = requested.triple().architecture; - if host_arch != requested_arch { - return Err(format!( - "skipped {}: host can't run {:?} programs", - file_path, requested_arch - )); + + match (host_arch, requested_arch) { + (host, requested) if host == requested => {} + (Architecture::Riscv64(host), Architecture::Riscv64(req)) + if is_riscv64_compatible(host, req) => {} + _ => { + return Err(format!( + "skipped {}: host can't run {:?} programs", + file_path, requested_arch + )) + } } // We need to check that the requested ISA does not have any flags that diff --git a/cranelift/native/Cargo.toml b/cranelift/native/Cargo.toml index ffed0efa3d80..b0f1bdc3fdb9 100644 --- a/cranelift/native/Cargo.toml +++ b/cranelift/native/Cargo.toml @@ -24,3 +24,4 @@ core = ["cranelift-codegen/core"] [badges] maintenance = { status = "experimental" } + diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs index e73f583d7b38..3ab22d67dafb 100644 --- a/cranelift/object/src/backend.rs +++ b/cranelift/object/src/backend.rs @@ -72,6 +72,7 @@ impl ObjectBuilder { target_lexicon::Architecture::X86_64 => object::Architecture::X86_64, target_lexicon::Architecture::Arm(_) => object::Architecture::Arm, target_lexicon::Architecture::Aarch64(_) => object::Architecture::Aarch64, + target_lexicon::Architecture::Riscv64(_) => object::Architecture::Riscv64, target_lexicon::Architecture::S390x => object::Architecture::S390x, architecture => { return Err(ModuleError::Backend(anyhow!( diff --git a/crates/environ/src/compilation.rs b/crates/environ/src/compilation.rs index f4f6aa72d4d2..52635cbf04e6 100644 --- a/crates/environ/src/compilation.rs +++ b/crates/environ/src/compilation.rs @@ -240,6 +240,7 @@ pub trait Compiler: Send + Sync { Arm(_) => Architecture::Arm, Aarch64(_) => Architecture::Aarch64, S390x => Architecture::S390x, + Riscv64(_) => Architecture::Riscv64, architecture => { anyhow::bail!("target architecture {:?} is unsupported", architecture,); } diff --git a/crates/fiber/src/unix.rs b/crates/fiber/src/unix.rs index 571ed1be8f19..6739aca66bf4 100644 --- a/crates/fiber/src/unix.rs +++ b/crates/fiber/src/unix.rs @@ -189,7 +189,9 @@ cfg_if::cfg_if! { } else if #[cfg(target_arch = "s390x")] { // currently `global_asm!` isn't stable on s390x so this is an external // assembler file built with the `build.rs`. - } else { + } else if #[cfg(target_arch = "riscv64")] { + mod riscv64; + }else { compile_error!("fibers are not supported on this CPU architecture"); } } diff --git a/crates/fiber/src/unix/riscv64.rs b/crates/fiber/src/unix/riscv64.rs new file mode 100644 index 000000000000..6c3c71714f32 --- /dev/null +++ b/crates/fiber/src/unix/riscv64.rs @@ -0,0 +1,157 @@ +// A WORD OF CAUTION +// +// This entire file basically needs to be kept in sync with itself. It's not +// really possible to modify just one bit of this file without understanding +// all the other bits. Documentation tries to reference various bits here and +// there but try to make sure to read over everything before tweaking things! + +use wasmtime_asm_macros::asm_func; + +// fn(top_of_stack(rdi): *mut u8) +asm_func!( + "wasmtime_fiber_switch", + " + // See https://github.com/rust-lang/rust/issues/80608. + .attribute arch, \"rv64gc\" + + // We're switching to arbitrary code somewhere else, so pessimistically + // assume that all callee-save register are clobbered. This means we need + // to save/restore all of them. + // + // Note that this order for saving is important since we use CFI directives + // below to point to where all the saved registers are. + sd ra,-0x8(sp) + sd fp,-0x10(sp) + sd s1,-0x18(sp) + sd s2,-0x20(sp) + sd s3,-0x28(sp) + sd s4,-0x30(sp) + sd s5,-0x38(sp) + sd s6,-0x40(sp) + sd s7,-0x48(sp) + sd s8,-0x50(sp) + sd s9,-0x58(sp) + sd s10,-0x60(sp) + sd s11,-0x68(sp) + fsd fs0,-0x70(sp) + fsd fs1,-0x78(sp) + fsd fs2,-0x80(sp) + fsd fs3,-0x88(sp) + fsd fs4,-0x90(sp) + fsd fs5,-0x98(sp) + fsd fs6,-0xa0(sp) + fsd fs7,-0xa8(sp) + fsd fs8,-0xb0(sp) + fsd fs9,-0xb8(sp) + fsd fs10,-0xc0(sp) + fsd fs11,-0xc8(sp) + addi sp , sp , -0xd0 + + ld t0 ,-0x10(a0) + sd sp ,-0x10(a0) + + // Swap stacks and restore all our callee-saved registers + mv sp,t0 + + fld fs11,0x8(sp) + fld fs10,0x10(sp) + fld fs9,0x18(sp) + fld fs8,0x20(sp) + fld fs7,0x28(sp) + fld fs6,0x30(sp) + fld fs5,0x38(sp) + fld fs4,0x40(sp) + fld fs3,0x48(sp) + fld fs2,0x50(sp) + fld fs1,0x58(sp) + fld fs0,0x60(sp) + ld s11,0x68(sp) + ld s10,0x70(sp) + ld s9,0x78(sp) + ld s8,0x80(sp) + ld s7,0x88(sp) + ld s6,0x90(sp) + ld s5,0x98(sp) + ld s4,0xa0(sp) + ld s3,0xa8(sp) + ld s2,0xb0(sp) + ld s1,0xb8(sp) + ld fp,0xc0(sp) + ld ra,0xc8(sp) + addi sp , sp , 0xd0 + jr ra + ", +); + +// fn( +// top_of_stack(a0): *mut u8, +// entry_point(a1): extern fn(*mut u8, *mut u8), +// entry_arg0(a2): *mut u8, +// ) +#[rustfmt::skip] +asm_func!( + "wasmtime_fiber_init", + " + lla t0,wasmtime_fiber_start + sd t0,-0x18(a0) // ra,first should be wasmtime_fiber_start. + sd a0,-0x20(a0) // fp pointer. + sd a1,-0x28(a0) // entry_point will load to s1. + sd a2,-0x30(a0) // entry_arg0 will load to s2. + + // + addi t0,a0,-0xe0 + sd t0,-0x10(a0) + ret + ", +); + +asm_func!( + "wasmtime_fiber_start", + " + .cfi_startproc simple + .cfi_def_cfa_offset 0 + + + .cfi_escape 0x0f, /* DW_CFA_def_cfa_expression */ \ + 5, /* the byte length of this expression */ \ + 0x52, /* DW_OP_reg2 (sp) */ \ + 0x06, /* DW_OP_deref */ \ + 0x08, 0xd0 , /* DW_OP_const1u 0xc8 */ \ + 0x22 /* DW_OP_plus */ + + + .cfi_rel_offset ra,-0x8 + .cfi_rel_offset fp,-0x10 + .cfi_rel_offset s1,-0x18 + .cfi_rel_offset s2,-0x20 + .cfi_rel_offset s3,-0x28 + .cfi_rel_offset s4,-0x30 + .cfi_rel_offset s5,-0x38 + .cfi_rel_offset s6,-0x40 + .cfi_rel_offset s7,-0x48 + .cfi_rel_offset s8,-0x50 + .cfi_rel_offset s9,-0x58 + .cfi_rel_offset s10,-0x60 + .cfi_rel_offset s11,-0x68 + .cfi_rel_offset fs0,-0x70 + .cfi_rel_offset fs1,-0x78 + .cfi_rel_offset fs2,-0x80 + .cfi_rel_offset fs3,-0x88 + .cfi_rel_offset fs4,-0x90 + .cfi_rel_offset fs5,-0x98 + .cfi_rel_offset fs6,-0xa0 + .cfi_rel_offset fs7,-0xa8 + .cfi_rel_offset fs8,-0xb0 + .cfi_rel_offset fs9,-0xb8 + .cfi_rel_offset fs10,-0xc0 + .cfi_rel_offset fs11,-0xc8 + + mv a0,s2 + mv a1,fp + jalr s1 + // .4byte 0 will cause panic. + // for safety just like x86_64.rs. + .4byte 0 + .cfi_endproc + ", +); diff --git a/crates/fuzzing/Cargo.toml b/crates/fuzzing/Cargo.toml index 7eed74511ec6..7250ecd161b4 100644 --- a/crates/fuzzing/Cargo.toml +++ b/crates/fuzzing/Cargo.toml @@ -30,8 +30,8 @@ wasmi = "0.11.0" # We rely on precompiled v8 binaries, but rusty-v8 doesn't have a precompiled # binary for MinGW which is built on our CI. It does have one for Windows-msvc, # though, so we could use that if we wanted. For now though just simplify a bit -# and don't depend on this on Windows. The same applies on s390x. -[target.'cfg(not(any(windows, target_arch = "s390x")))'.dependencies] +# and don't depend on this on Windows. The same applies on s390x and riscv. +[target.'cfg(not(any(windows, target_arch = "s390x", target_arch = "riscv64")))'.dependencies] v8 = "0.44.3" [dev-dependencies] diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs index 98845a19c41e..a49d086a2921 100644 --- a/crates/fuzzing/src/oracles.rs +++ b/crates/fuzzing/src/oracles.rs @@ -31,7 +31,7 @@ use std::time::{Duration, Instant}; use wasmtime::*; use wasmtime_wast::WastContext; -#[cfg(not(any(windows, target_arch = "s390x")))] +#[cfg(not(any(windows, target_arch = "s390x", target_arch = "riscv64")))] mod diff_v8; static CNT: AtomicUsize = AtomicUsize::new(0); diff --git a/crates/fuzzing/src/oracles/engine.rs b/crates/fuzzing/src/oracles/engine.rs index f791dee0247b..977f072321b9 100644 --- a/crates/fuzzing/src/oracles/engine.rs +++ b/crates/fuzzing/src/oracles/engine.rs @@ -24,9 +24,9 @@ pub fn build( #[cfg(not(feature = "fuzz-spec-interpreter"))] "spec" => return Ok(None), - #[cfg(not(any(windows, target_arch = "s390x")))] + #[cfg(not(any(windows, target_arch = "s390x", target_arch = "riscv64")))] "v8" => Box::new(crate::oracles::diff_v8::V8Engine::new(config)), - #[cfg(any(windows, target_arch = "s390x"))] + #[cfg(any(windows, target_arch = "s390x", target_arch = "riscv64"))] "v8" => return Ok(None), _ => panic!("unknown engine {name}"), diff --git a/crates/jit/src/debug.rs b/crates/jit/src/debug.rs index aeb801f49212..06a7ffa1ac2b 100644 --- a/crates/jit/src/debug.rs +++ b/crates/jit/src/debug.rs @@ -97,6 +97,7 @@ fn ensure_supported_elf_format(bytes: &[u8]) -> Result { EM_AARCH64 => (), EM_X86_64 => (), EM_S390 => (), + EM_RISCV => (), machine => { bail!("Unsupported ELF target machine: {:x}", machine); } diff --git a/crates/runtime/src/trampolines.rs b/crates/runtime/src/trampolines.rs index 044b2907ec7b..ee981c900a71 100644 --- a/crates/runtime/src/trampolines.rs +++ b/crates/runtime/src/trampolines.rs @@ -51,6 +51,9 @@ cfg_if::cfg_if! { } else if #[cfg(target_arch = "s390x")] { #[macro_use] mod s390x; + }else if #[cfg(target_arch = "riscv64")] { + #[macro_use] + mod riscv64; } else { compile_error!("unsupported architecture"); } diff --git a/crates/runtime/src/trampolines/riscv64.rs b/crates/runtime/src/trampolines/riscv64.rs new file mode 100644 index 000000000000..950d9b73c980 --- /dev/null +++ b/crates/runtime/src/trampolines/riscv64.rs @@ -0,0 +1,117 @@ +use wasmtime_asm_macros::asm_func; + +#[rustfmt::skip] +asm_func!( + "host_to_wasm_trampoline", + r#" + .cfi_startproc + + // Load the pointer to `VMRuntimeLimits` in `t0`. + ld t0, 8(a1) + + // Check to see if callee is a core `VMContext` (MAGIC == "core"). NB: + // we do not support big-endian riscv64 so the magic value is always + // little-endian encoded. + li t1,0x65726f63 + lwu t3,0(a0) + bne t3,t1,ne + mv t1,sp + j over + ne: + li t1,-1 + over: + // Store the last Wasm SP into the `last_wasm_entry_sp` in the limits, if this + // was core Wasm, otherwise store an invalid sentinal value. + sd t1,40(t0) + + ld t0,16(a1) + jr t0 + + .cfi_endproc + "# +); + +#[cfg(test)] +mod host_to_wasm_trampoline_offsets_tests { + use wasmtime_environ::{Module, PtrSize, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(40, offsets.ptr.vmruntime_limits_last_wasm_entry_sp()); + assert_eq!(16, offsets.vmctx_callee()); + assert_eq!(0x65726f63, u32::from_le_bytes(*b"core")); + } +} + +#[rustfmt::skip] +asm_func!( + "wasm_to_host_trampoline", + " + .cfi_startproc simple + + // Load the pointer to `VMRuntimeLimits` in `t0`. + ld t0,8(a1) + + // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. + sd fp,24(t0) + + // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. + sd ra,32(t0) + + // Tail call to the actual host function. + // + // This *must* be a tail call so that we do not push to the stack and mess + // up the offsets of stack arguments (if any). + ld t0, 8(a0) + jr t0 + .cfi_endproc + ", +); + +#[cfg(test)] +mod wasm_to_host_trampoline_offsets_tests { + use crate::VMHostFuncContext; + use memoffset::offset_of; + use wasmtime_environ::{Module, PtrSize, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(24, offsets.ptr.vmruntime_limits_last_wasm_exit_fp()); + assert_eq!(32, offsets.ptr.vmruntime_limits_last_wasm_exit_pc()); + assert_eq!(8, offset_of!(VMHostFuncContext, host_func)); + } +} + +#[rustfmt::skip] +macro_rules! wasm_to_libcall_trampoline { + ($libcall:ident ; $libcall_impl:ident) => { + wasmtime_asm_macros::asm_func!( + stringify!($libcall), + " + .cfi_startproc + + // Load the pointer to `VMRuntimeLimits` in `t0`. + ld t0, 8(a0) + + // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. + sd fp, 24(t0) + + // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. + sd ra, 32(t0) + + // Tail call to the actual implementation of this libcall. + j ", wasmtime_asm_macros::asm_sym!(stringify!($libcall_impl)), " + + .cfi_endproc + " + ); + }; +} diff --git a/crates/runtime/src/traphandlers/backtrace.rs b/crates/runtime/src/traphandlers/backtrace.rs index 0982f36b925b..b18d8b96c374 100644 --- a/crates/runtime/src/traphandlers/backtrace.rs +++ b/crates/runtime/src/traphandlers/backtrace.rs @@ -42,6 +42,9 @@ cfg_if! { } else if #[cfg(target_arch = "s390x")] { mod s390x; use s390x as arch; + } else if #[cfg(target_arch = "riscv64")] { + mod riscv64; + use riscv64 as arch; } else { compile_error!("unsupported architecture"); } diff --git a/crates/runtime/src/traphandlers/backtrace/riscv64.rs b/crates/runtime/src/traphandlers/backtrace/riscv64.rs new file mode 100644 index 000000000000..44badb60506e --- /dev/null +++ b/crates/runtime/src/traphandlers/backtrace/riscv64.rs @@ -0,0 +1,21 @@ +// +pub unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize { + *(fp as *mut usize).offset(1) +} + +// And the current frame pointer points to the next older frame pointer. +pub const NEXT_OLDER_FP_FROM_FP_OFFSET: usize = 0; + +pub fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool { + // Calls in riscv64 push two i64s (old FP and return PC) so our entry SP is + // two i64s above the first Wasm FP. + fp == first_wasm_sp - 16 +} + +pub fn assert_entry_sp_is_aligned(sp: usize) { + assert_eq!(sp % 16, 0, "stack should always be aligned to 16"); +} + +pub fn assert_fp_is_aligned(fp: usize) { + assert_eq!(fp % 16, 0, "stack should always be aligned to 16"); +} diff --git a/crates/runtime/src/traphandlers/unix.rs b/crates/runtime/src/traphandlers/unix.rs index 445646e6a580..64dfe58f169a 100644 --- a/crates/runtime/src/traphandlers/unix.rs +++ b/crates/runtime/src/traphandlers/unix.rs @@ -228,7 +228,14 @@ unsafe fn get_pc_and_fp(cx: *mut libc::c_void, _signum: libc::c_int) -> (*const cx.uc_mcontext.mc_rip as *const u8, cx.uc_mcontext.mc_rbp as usize, ) - } else { + } else if #[cfg(all(target_os = "linux", target_arch = "riscv64"))] { + let cx = &*(cx as *const libc::ucontext_t); + ( + cx.uc_mcontext.__gregs[libc::REG_PC] as *const u8, + cx.uc_mcontext.__gregs[libc::REG_S0] as usize, + ) + } + else { compile_error!("unsupported platform"); } } diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 85c3aeb99bc5..c25dba62b449 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -490,6 +490,17 @@ impl Engine { } } + #[cfg(target_arch = "riscv64")] + { + enabled = match flag { + // make sure `test_isa_flags_mismatch` test pass. + "not_a_flag" => None, + // due to `is_riscv64_feature_detected` is not stable. + // we cannot use it. + _ => Some(true), + } + } + #[cfg(target_arch = "x86_64")] { enabled = match flag { diff --git a/tests/all/memory.rs b/tests/all/memory.rs index cc3f3de24258..50b5e4a3471f 100644 --- a/tests/all/memory.rs +++ b/tests/all/memory.rs @@ -19,7 +19,7 @@ fn module(engine: &Engine) -> Result { (2, &["i32.load16_s"]), (4, &["i32.load" /*, "f32.load"*/]), (8, &["i64.load" /*, "f64.load"*/]), - #[cfg(not(target_arch = "s390x"))] + #[cfg(not(any(target_arch = "s390x", target_arch = "riscv64")))] (16, &["v128.load"]), ] .iter() diff --git a/tests/host_segfault.rs b/tests/host_segfault.rs index 7a761678b81a..6adac707e5d6 100644 --- a/tests/host_segfault.rs +++ b/tests/host_segfault.rs @@ -144,6 +144,10 @@ fn main() { }, true, ), + #[cfg(not(any(target_arch = "riscv64")))] + // Due to `InstanceAllocationStrategy::pooling()` trying to alloc more than 6000G memory space. + // https://gitlab.com/qemu-project/qemu/-/issues/1214 + // https://gitlab.com/qemu-project/qemu/-/issues/290 ( "hit async stack guard page with pooling allocator", || { diff --git a/tests/spec_testsuite b/tests/spec_testsuite index e25ae159357c..4fd2339b5e97 160000 --- a/tests/spec_testsuite +++ b/tests/spec_testsuite @@ -1 +1 @@ -Subproject commit e25ae159357c055b3a6fac99043644e208d26d2a +Subproject commit 4fd2339b5e9709e74b326797f69a88b13eac4d47