From 7477b61b2416ca130bd3ed9bbc96988e5de17623 Mon Sep 17 00:00:00 2001 From: Tristan Ross Date: Tue, 17 Dec 2024 10:40:22 -0800 Subject: [PATCH 01/35] [libc] Add unistd overlay (#119312) Reverts the revert #119295 of #118882 by expanding #118882 with additional fixes which made CI unhappy. --- libc/hdr/CMakeLists.txt | 3 + libc/hdr/types/CMakeLists.txt | 16 +++++ libc/hdr/types/ssize_t.h | 4 +- libc/hdr/types/uid_t.h | 22 ++++++ libc/hdr/unistd_macros.h | 2 +- libc/hdr/unistd_overlay.h | 69 +++++++++++++++++++ libc/src/unistd/dup.h | 2 +- libc/src/unistd/dup2.h | 2 +- libc/src/unistd/dup3.h | 2 +- libc/src/unistd/fork.h | 3 +- libc/src/unistd/ftruncate.h | 3 +- libc/src/unistd/getcwd.h | 3 +- libc/src/unistd/geteuid.h | 3 +- libc/src/unistd/getopt.h | 2 +- libc/src/unistd/getpid.h | 3 +- libc/src/unistd/getppid.h | 3 +- libc/src/unistd/getuid.h | 3 +- libc/src/unistd/isatty.h | 2 +- libc/src/unistd/link.h | 2 +- libc/src/unistd/linux/CMakeLists.txt | 39 +++++++++++ libc/src/unistd/linux/ftruncate.cpp | 2 +- libc/src/unistd/linux/lseek.cpp | 2 +- libc/src/unistd/linux/sysconf.cpp | 2 +- libc/src/unistd/linux/truncate.cpp | 2 +- libc/src/unistd/lseek.h | 3 +- libc/src/unistd/pread.h | 5 +- libc/src/unistd/pwrite.h | 5 +- libc/src/unistd/read.h | 4 +- libc/src/unistd/readlink.h | 4 +- libc/src/unistd/readlinkat.h | 4 +- libc/src/unistd/swab.h | 2 +- libc/src/unistd/symlink.h | 2 +- libc/src/unistd/symlinkat.h | 2 +- libc/src/unistd/syscall.h | 2 +- libc/src/unistd/sysconf.h | 2 +- libc/src/unistd/truncate.h | 3 +- libc/src/unistd/write.h | 4 +- .../sys/mman/linux/remap_file_pages_test.cpp | 6 +- .../llvm-project-overlay/libc/BUILD.bazel | 65 +++++++++++++++++ 39 files changed, 272 insertions(+), 37 deletions(-) create mode 100644 libc/hdr/types/uid_t.h create mode 100644 libc/hdr/unistd_overlay.h diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 5eb311f4bb229..7f523c50e8694 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -126,10 +126,13 @@ add_proxy_header_library( libc.include.llvm-libc-macros.sys_stat_macros ) +add_header_library(unistd_overlay HDRS unistd_overlay.h) add_proxy_header_library( unistd_macros HDRS unistd_macros.h + DEPENDS + .unistd_overlay FULL_BUILD_DEPENDS libc.include.unistd libc.include.llvm-libc-macros.unistd_macros diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt index ce3ecefe36438..5156b58ee11af 100644 --- a/libc/hdr/types/CMakeLists.txt +++ b/libc/hdr/types/CMakeLists.txt @@ -93,6 +93,14 @@ add_proxy_header_library( libc.include.llvm-libc-types.size_t ) +add_proxy_header_library( + ssize_t + HDRS + ssize_t.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.ssize_t +) + add_proxy_header_library( mode_t HDRS @@ -309,3 +317,11 @@ add_proxy_header_library( libc.include.llvm-libc-types.wint_t libc.include.wchar ) + +add_proxy_header_library( + uid_t + HDRS + uid_t.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.uid_t +) diff --git a/libc/hdr/types/ssize_t.h b/libc/hdr/types/ssize_t.h index 4d2000780ee11..7eff98f33c2bb 100644 --- a/libc/hdr/types/ssize_t.h +++ b/libc/hdr/types/ssize_t.h @@ -14,9 +14,7 @@ #else -#define __need_ssize_t -#include -#undef __need_ssize_t +#include #endif // LIBC_FULL_BUILD diff --git a/libc/hdr/types/uid_t.h b/libc/hdr/types/uid_t.h new file mode 100644 index 0000000000000..a1eefb03228c0 --- /dev/null +++ b/libc/hdr/types/uid_t.h @@ -0,0 +1,22 @@ +//===-- Proxy for uid_t ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_TYPES_UID_T_H +#define LLVM_LIBC_HDR_TYPES_UID_T_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/uid_t.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_UID_T_H diff --git a/libc/hdr/unistd_macros.h b/libc/hdr/unistd_macros.h index 132e123280139..5c2b24354dd3e 100644 --- a/libc/hdr/unistd_macros.h +++ b/libc/hdr/unistd_macros.h @@ -15,7 +15,7 @@ #else // Overlay mode -#include +#include "unistd_overlay.h" #endif // LLVM_LIBC_FULL_BUILD diff --git a/libc/hdr/unistd_overlay.h b/libc/hdr/unistd_overlay.h new file mode 100644 index 0000000000000..e3001e0cda08f --- /dev/null +++ b/libc/hdr/unistd_overlay.h @@ -0,0 +1,69 @@ +//===-- Including unistd.h in overlay mode -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_UNISTD_OVERLAY_H +#define LLVM_LIBC_HDR_UNISTD_OVERLAY_H + +#ifdef LIBC_FULL_BUILD +#error "This header should only be included in overlay mode" +#endif + +// Overlay mode + +// glibc header might provide extern inline definitions for few +// functions, causing external alias errors. They are guarded by +// `__USE_EXTERN_INLINES` macro. We temporarily disable `__USE_EXTERN_INLINES` +// macro by defining `__NO_INLINE__` before including . +// And the same with `__USE_FORTIFY_LEVEL`, which will be temporarily disabled +// with `_FORTIFY_SOURCE`. + +#ifdef _FORTIFY_SOURCE +#define LIBC_OLD_FORTIFY_SOURCE _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif + +#ifdef __USE_EXTERN_INLINES +#define LIBC_OLD_USE_EXTERN_INLINES +#undef __USE_EXTERN_INLINES +#endif + +#ifdef __USE_FORTIFY_LEVEL +#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL +#undef __USE_FORTIFY_LEVEL +#define __USE_FORTIFY_LEVEL 0 +#endif + +#ifndef __NO_INLINE__ +#define __NO_INLINE__ 1 +#define LIBC_SET_NO_INLINE +#endif + +#include + +#ifdef LIBC_OLD_FORTIFY_SOURCE +#define _FORTIFY_SOURCE LIBC_OLD_FORTIFY_SOURCE +#undef LIBC_OLD_FORTIFY_SOURCE +#endif + +#ifdef LIBC_SET_NO_INLINE +#undef __NO_INLINE__ +#undef LIBC_SET_NO_INLINE +#endif + +#ifdef LIBC_OLD_USE_FORTIFY_LEVEL +#undef __USE_FORTIFY_LEVEL +#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL +#undef LIBC_OLD_USE_FORTIFY_LEVEL +#endif + +#ifdef LIBC_OLD_USE_EXTERN_INLINES +#define __USE_EXTERN_INLINES +#undef LIBC_OLD_USE_EXTERN_INLINES +#endif + +#endif // LLVM_LIBC_HDR_UNISTD_OVERLAY_H diff --git a/libc/src/unistd/dup.h b/libc/src/unistd/dup.h index 63f093c0ee436..57601455acc61 100644 --- a/libc/src/unistd/dup.h +++ b/libc/src/unistd/dup.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_DUP_H #define LLVM_LIBC_SRC_UNISTD_DUP_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/dup2.h b/libc/src/unistd/dup2.h index 060c112daf08f..e2cf62389bca8 100644 --- a/libc/src/unistd/dup2.h +++ b/libc/src/unistd/dup2.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_DUP2_H #define LLVM_LIBC_SRC_UNISTD_DUP2_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/dup3.h b/libc/src/unistd/dup3.h index f3868867123b4..06d9b23dbd200 100644 --- a/libc/src/unistd/dup3.h +++ b/libc/src/unistd/dup3.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_DUP3_H #define LLVM_LIBC_SRC_UNISTD_DUP3_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/fork.h b/libc/src/unistd/fork.h index b6fd5763b3a5f..a9f8a9795d3a0 100644 --- a/libc/src/unistd/fork.h +++ b/libc/src/unistd/fork.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_FORK_H #define LLVM_LIBC_SRC_UNISTD_FORK_H +#include "hdr/types/pid_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/ftruncate.h b/libc/src/unistd/ftruncate.h index cd8d363727c4a..95901c8b70035 100644 --- a/libc/src/unistd/ftruncate.h +++ b/libc/src/unistd/ftruncate.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_FTRUNCATE_H #define LLVM_LIBC_SRC_UNISTD_FTRUNCATE_H +#include "hdr/types/off_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/getcwd.h b/libc/src/unistd/getcwd.h index 8b63a91c26b5c..3943c0217ec1b 100644 --- a/libc/src/unistd/getcwd.h +++ b/libc/src/unistd/getcwd.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_GETCWD_H #define LLVM_LIBC_SRC_UNISTD_GETCWD_H +#include "hdr/types/size_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/geteuid.h b/libc/src/unistd/geteuid.h index 9469797bd3d4e..6827266ee81d6 100644 --- a/libc/src/unistd/geteuid.h +++ b/libc/src/unistd/geteuid.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_GETEUID_H #define LLVM_LIBC_SRC_UNISTD_GETEUID_H +#include "hdr/types/uid_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/getopt.h b/libc/src/unistd/getopt.h index 1be3331dcd98a..0be639d871196 100644 --- a/libc/src/unistd/getopt.h +++ b/libc/src/unistd/getopt.h @@ -10,8 +10,8 @@ #define LLVM_LIBC_SRC_UNISTD_GETOPT_H #include "hdr/types/FILE.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/getpid.h b/libc/src/unistd/getpid.h index c3c55b0c06b10..9e2f156266b92 100644 --- a/libc/src/unistd/getpid.h +++ b/libc/src/unistd/getpid.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_GETPID_H #define LLVM_LIBC_SRC_UNISTD_GETPID_H +#include "hdr/types/pid_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/getppid.h b/libc/src/unistd/getppid.h index d820791bc06fa..8243fa93eddd1 100644 --- a/libc/src/unistd/getppid.h +++ b/libc/src/unistd/getppid.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_GETPPID_H #define LLVM_LIBC_SRC_UNISTD_GETPPID_H +#include "hdr/types/pid_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/getuid.h b/libc/src/unistd/getuid.h index dd82c7119d401..f8b3731a9c064 100644 --- a/libc/src/unistd/getuid.h +++ b/libc/src/unistd/getuid.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_GETUID_H #define LLVM_LIBC_SRC_UNISTD_GETUID_H +#include "hdr/types/uid_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/isatty.h b/libc/src/unistd/isatty.h index 6dd1b7b817171..5c8be6541c99c 100644 --- a/libc/src/unistd/isatty.h +++ b/libc/src/unistd/isatty.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_ISATTY_H #define LLVM_LIBC_SRC_UNISTD_ISATTY_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/link.h b/libc/src/unistd/link.h index 9b27aa1accf4e..c1c26c5e0d494 100644 --- a/libc/src/unistd/link.h +++ b/libc/src/unistd/link.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_LINK_H #define LLVM_LIBC_SRC_UNISTD_LINK_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index 8a44873141414..ed360c73354ac 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -45,6 +45,7 @@ add_entrypoint_object( HDRS ../dup.h DEPENDS + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -72,6 +73,7 @@ add_entrypoint_object( HDRS ../dup3.h DEPENDS + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -98,6 +100,8 @@ add_entrypoint_object( HDRS ../fork.h DEPENDS + libc.hdr.types.pid_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.threads.fork_callbacks @@ -166,6 +170,8 @@ add_entrypoint_object( HDRS ../ftruncate.h DEPENDS + libc.hdr.types.off_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -179,6 +185,8 @@ add_entrypoint_object( HDRS ../getcwd.h DEPENDS + libc.hdr.types.size_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -192,6 +200,8 @@ add_entrypoint_object( HDRS ../geteuid.h DEPENDS + libc.hdr.types.uid_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -204,6 +214,8 @@ add_entrypoint_object( HDRS ../getpid.h DEPENDS + libc.hdr.types.pid_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -216,6 +228,8 @@ add_entrypoint_object( HDRS ../getppid.h DEPENDS + libc.hdr.types.pid_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -228,6 +242,8 @@ add_entrypoint_object( HDRS ../getuid.h DEPENDS + libc.hdr.types.uid_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -240,6 +256,7 @@ add_entrypoint_object( HDRS ../isatty.h DEPENDS + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_ioctl libc.include.sys_syscall @@ -282,6 +299,8 @@ add_entrypoint_object( HDRS ../lseek.h DEPENDS + libc.hdr.types.off_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -349,6 +368,10 @@ add_entrypoint_object( HDRS ../pread.h DEPENDS + libc.hdr.types.off_t + libc.hdr.types.size_t + libc.hdr.types.ssize_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -363,6 +386,10 @@ add_entrypoint_object( HDRS ../pwrite.h DEPENDS + libc.hdr.types.off_t + libc.hdr.types.size_t + libc.hdr.types.ssize_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -376,6 +403,9 @@ add_entrypoint_object( HDRS ../read.h DEPENDS + libc.hdr.types.size_t + libc.hdr.types.ssize_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -404,6 +434,8 @@ add_entrypoint_object( HDRS ../readlink.h DEPENDS + libc.hdr.types.size_t + libc.hdr.types.ssize_t libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall @@ -418,6 +450,8 @@ add_entrypoint_object( HDRS ../readlinkat.h DEPENDS + libc.hdr.types.size_t + libc.hdr.types.ssize_t libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall @@ -485,6 +519,8 @@ add_entrypoint_object( HDRS ../truncate.h DEPENDS + libc.hdr.types.off_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -526,6 +562,9 @@ add_entrypoint_object( HDRS ../write.h DEPENDS + libc.hdr.types.size_t + libc.hdr.types.ssize_t + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil diff --git a/libc/src/unistd/linux/ftruncate.cpp b/libc/src/unistd/linux/ftruncate.cpp index 39cb3b5778faa..ccbb0634664aa 100644 --- a/libc/src/unistd/linux/ftruncate.cpp +++ b/libc/src/unistd/linux/ftruncate.cpp @@ -11,11 +11,11 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" #include // For uint64_t. #include // For syscall numbers. -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/lseek.cpp b/libc/src/unistd/linux/lseek.cpp index 9486cecf3b123..0e957498da746 100644 --- a/libc/src/unistd/linux/lseek.cpp +++ b/libc/src/unistd/linux/lseek.cpp @@ -14,8 +14,8 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/types/off_t.h" #include // For syscall numbers. -#include // For off_t. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/sysconf.cpp b/libc/src/unistd/linux/sysconf.cpp index 1540eb499ec12..f785ff321c7d7 100644 --- a/libc/src/unistd/linux/sysconf.cpp +++ b/libc/src/unistd/linux/sysconf.cpp @@ -10,11 +10,11 @@ #include "src/__support/common.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" #include "src/sys/auxv/getauxval.h" #include -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/truncate.cpp b/libc/src/unistd/linux/truncate.cpp index 283cf4098cf45..8236edb480d10 100644 --- a/libc/src/unistd/linux/truncate.cpp +++ b/libc/src/unistd/linux/truncate.cpp @@ -13,9 +13,9 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" +#include "hdr/unistd_macros.h" #include // For uint64_t. #include // For syscall numbers. -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/lseek.h b/libc/src/unistd/lseek.h index a8704ec7058dd..e8442738dfbd4 100644 --- a/libc/src/unistd/lseek.h +++ b/libc/src/unistd/lseek.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_LSEEK_H #define LLVM_LIBC_SRC_UNISTD_LSEEK_H +#include "hdr/types/off_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/pread.h b/libc/src/unistd/pread.h index 4723675e82a20..f8b66c548868e 100644 --- a/libc/src/unistd/pread.h +++ b/libc/src/unistd/pread.h @@ -9,8 +9,11 @@ #ifndef LLVM_LIBC_SRC_UNISTD_PREAD_H #define LLVM_LIBC_SRC_UNISTD_PREAD_H +#include "hdr/types/off_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/ssize_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/pwrite.h b/libc/src/unistd/pwrite.h index baffbe48b6437..08ebb89347214 100644 --- a/libc/src/unistd/pwrite.h +++ b/libc/src/unistd/pwrite.h @@ -9,8 +9,11 @@ #ifndef LLVM_LIBC_SRC_UNISTD_PWRITE_H #define LLVM_LIBC_SRC_UNISTD_PWRITE_H +#include "hdr/types/off_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/ssize_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/read.h b/libc/src/unistd/read.h index 01231cb82e35e..5d3527372558b 100644 --- a/libc/src/unistd/read.h +++ b/libc/src/unistd/read.h @@ -9,8 +9,10 @@ #ifndef LLVM_LIBC_SRC_UNISTD_READ_H #define LLVM_LIBC_SRC_UNISTD_READ_H +#include "hdr/types/size_t.h" +#include "hdr/types/ssize_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/readlink.h b/libc/src/unistd/readlink.h index a73e9740c7463..b63643e2a7018 100644 --- a/libc/src/unistd/readlink.h +++ b/libc/src/unistd/readlink.h @@ -9,8 +9,10 @@ #ifndef LLVM_LIBC_SRC_UNISTD_READLINK_H #define LLVM_LIBC_SRC_UNISTD_READLINK_H +#include "hdr/types/size_t.h" +#include "hdr/types/ssize_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/readlinkat.h b/libc/src/unistd/readlinkat.h index 6bdd48b537fc8..0f5657e45a250 100644 --- a/libc/src/unistd/readlinkat.h +++ b/libc/src/unistd/readlinkat.h @@ -9,8 +9,10 @@ #ifndef LLVM_LIBC_SRC_UNISTD_READLINKAT_H #define LLVM_LIBC_SRC_UNISTD_READLINKAT_H +#include "hdr/types/size_t.h" +#include "hdr/types/ssize_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/swab.h b/libc/src/unistd/swab.h index caa9c71001097..f6fa3414c43f5 100644 --- a/libc/src/unistd/swab.h +++ b/libc/src/unistd/swab.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_SWAB_H #define LLVM_LIBC_SRC_UNISTD_SWAB_H +#include "hdr/types/ssize_t.h" #include "src/__support/macros/config.h" -#include // For ssize_t namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/symlink.h b/libc/src/unistd/symlink.h index 47f04f8845b46..c743a32a8930f 100644 --- a/libc/src/unistd/symlink.h +++ b/libc/src/unistd/symlink.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_SYMLINK_H #define LLVM_LIBC_SRC_UNISTD_SYMLINK_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/symlinkat.h b/libc/src/unistd/symlinkat.h index 9f8ad517af5a6..6697ce4d537e6 100644 --- a/libc/src/unistd/symlinkat.h +++ b/libc/src/unistd/symlinkat.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_SYMLINKAT_H #define LLVM_LIBC_SRC_UNISTD_SYMLINKAT_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/syscall.h b/libc/src/unistd/syscall.h index db70745719cfe..7f82bd8a452f6 100644 --- a/libc/src/unistd/syscall.h +++ b/libc/src/unistd/syscall.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_SYSCALL_H #define LLVM_LIBC_SRC_UNISTD_SYSCALL_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" #include -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/sysconf.h b/libc/src/unistd/sysconf.h index 1b3f39e413508..470c4d846568c 100644 --- a/libc/src/unistd/sysconf.h +++ b/libc/src/unistd/sysconf.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_UNISTD_SYSCONF_H #define LLVM_LIBC_SRC_UNISTD_SYSCONF_H +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/truncate.h b/libc/src/unistd/truncate.h index 9ba5cf8317529..1e1066351953e 100644 --- a/libc/src/unistd/truncate.h +++ b/libc/src/unistd/truncate.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_UNISTD_TRUNCATE_H #define LLVM_LIBC_SRC_UNISTD_TRUNCATE_H +#include "hdr/types/off_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/write.h b/libc/src/unistd/write.h index e40ce19e21769..c5ba6bf719aaf 100644 --- a/libc/src/unistd/write.h +++ b/libc/src/unistd/write.h @@ -9,8 +9,10 @@ #ifndef LLVM_LIBC_SRC_UNISTD_WRITE_H #define LLVM_LIBC_SRC_UNISTD_WRITE_H +#include "hdr/types/size_t.h" +#include "hdr/types/ssize_t.h" +#include "hdr/unistd_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/test/src/sys/mman/linux/remap_file_pages_test.cpp b/libc/test/src/sys/mman/linux/remap_file_pages_test.cpp index 267f7598ff70e..ebc5c89a1ff57 100644 --- a/libc/test/src/sys/mman/linux/remap_file_pages_test.cpp +++ b/libc/test/src/sys/mman/linux/remap_file_pages_test.cpp @@ -23,7 +23,7 @@ using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails; using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; TEST(LlvmLibcRemapFilePagesTest, NoError) { - size_t page_size = sysconf(_SC_PAGE_SIZE); + size_t page_size = LIBC_NAMESPACE::sysconf(_SC_PAGE_SIZE); ASSERT_GT(page_size, size_t(0)); // Create a file-backed mapping @@ -53,7 +53,7 @@ TEST(LlvmLibcRemapFilePagesTest, NoError) { } TEST(LlvmLibcRemapFilePagesTest, ErrorInvalidFlags) { - size_t page_size = sysconf(_SC_PAGE_SIZE); + size_t page_size = LIBC_NAMESPACE::sysconf(_SC_PAGE_SIZE); ASSERT_GT(page_size, size_t(0)); // Create a file-backed mapping @@ -81,7 +81,7 @@ TEST(LlvmLibcRemapFilePagesTest, ErrorInvalidFlags) { } TEST(LlvmLibcRemapFilePagesTest, ErrorInvalidAddress) { - size_t page_size = sysconf(_SC_PAGESIZE); + size_t page_size = LIBC_NAMESPACE::sysconf(_SC_PAGESIZE); ASSERT_GT(page_size, size_t(0)); // Use an address that we haven't mapped diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 5331a0409d2a2..91c7db9029a66 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -180,6 +180,14 @@ libc_support_library( ], ) +libc_support_library( + name = "hdr_unistd_macros", + hdrs = ["hdr/unistd_macros.h"], + deps = [ + ":hdr_unistd_overlay", + ], +) + libc_support_library( name = "hdr_limits_macros", hdrs = ["hdr/limits_macros.h"], @@ -195,6 +203,11 @@ libc_support_library( hdrs = ["hdr/stdlib_overlay.h"], ) +libc_support_library( + name = "hdr_unistd_overlay", + hdrs = ["hdr/unistd_overlay.h"], +) + ############################ Type Proxy Header Files ########################### libc_support_library( @@ -300,6 +313,11 @@ libc_support_library( hdrs = ["hdr/types/pid_t.h"], ) +libc_support_library( + name = "types_uid_t", + hdrs = ["hdr/types/uid_t.h"], +) + libc_support_library( name = "types_off_t", hdrs = ["hdr/types/off_t.h"], @@ -3996,6 +4014,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", ], ) @@ -4008,6 +4027,7 @@ libc_function( ":__support_osutil_syscall", ":errno", ":hdr_fcntl_macros", + ":hdr_unistd_macros", ], ) @@ -4023,6 +4043,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", ], ) @@ -4067,6 +4088,8 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_off_t", ], ) @@ -4078,6 +4101,8 @@ libc_function( # ":__support_common", # ":__support_osutil_syscall", # ":errno", +# ":hdr_unistd_macros", +# ":types_size_t", # ], # ) @@ -4089,6 +4114,9 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_size_t", + ":types_uid_t", ], ) @@ -4100,6 +4128,8 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_pid_t", ], ) @@ -4111,6 +4141,8 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_uid_t", ], ) @@ -4125,6 +4157,7 @@ libc_function( # ":__support_file_file", # ":__support_osutil_syscall", # ":errno", +# ":hdr_unistd_macros", # ], # ) @@ -4136,6 +4169,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", ], ) @@ -4148,6 +4182,7 @@ libc_function( ":__support_osutil_syscall", ":errno", ":hdr_fcntl_macros", + ":hdr_unistd_macros", ], ) @@ -4160,6 +4195,7 @@ libc_function( ":__support_osutil_syscall", ":errno", ":hdr_fcntl_macros", + ":hdr_unistd_macros", ], ) @@ -4184,6 +4220,8 @@ libc_function( ":__support_file_linux_lseekimpl", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_off_t", ], ) @@ -4200,6 +4238,10 @@ libc_function( ":__support_macros_sanitizer", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_off_t", + ":types_size_t", + ":types_ssize_t", ], ) @@ -4215,6 +4257,10 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_off_t", + ":types_size_t", + ":types_ssize_t", ], ) @@ -4228,6 +4274,9 @@ libc_function( ":__support_macros_sanitizer", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_size_t", + ":types_ssize_t", ], ) @@ -4240,6 +4289,9 @@ libc_function( ":__support_osutil_syscall", ":errno", ":hdr_fcntl_macros", + ":hdr_unistd_macros", + ":types_size_t", + ":types_ssize_t", ], ) @@ -4252,6 +4304,9 @@ libc_function( ":__support_osutil_syscall", ":errno", ":hdr_fcntl_macros", + ":hdr_unistd_macros", + ":types_size_t", + ":types_ssize_t", ], ) @@ -4276,6 +4331,7 @@ libc_function( ":__support_osutil_syscall", ":errno", ":hdr_fcntl_macros", + ":hdr_unistd_macros", ], ) @@ -4288,6 +4344,7 @@ libc_function( ":__support_osutil_syscall", ":errno", ":hdr_fcntl_macros", + ":hdr_unistd_macros", ], ) @@ -4302,6 +4359,7 @@ libc_function( # ":__support_common", # ":__support_osutil_syscall", # ":errno", +# ":hdr_unistd_macros", # ], # ) @@ -4313,6 +4371,8 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_ssize_t", ], ) @@ -4324,6 +4384,8 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_off_t", ], ) @@ -4360,6 +4422,9 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_unistd_macros", + ":types_size_t", + ":types_ssize_t", ], ) From 5d4e4b35030cd103884b89a184ae02a6e8871388 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Dec 2024 17:57:50 +0000 Subject: [PATCH 02/35] [X86] LowerShift - use getConstant directly to create vector splat constants. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index efe5c2464dc00..2571873dba848 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30224,8 +30224,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, // another. The construction of this mask will be constant folded. // The mask for a logical right shift is nearly identical, the only // difference is that the all ones mask is shifted right instead of left. - SDValue CstFullMask = DAG.getAllOnesConstant(dl, NarrowScalarVT); - SDValue SplatFullMask = DAG.getSplat(VT, dl, CstFullMask); + SDValue SplatFullMask = DAG.getAllOnesConstant(dl, VT); SDValue Mask = DAG.getNode(LogicalOpc, dl, VT, SplatFullMask, Amt); Mask = DAG.getBitcast(WideVT, Mask); // Finally, we mask the shifted vector with the SWAR mask. @@ -30258,9 +30257,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, // // This is equal to Masked - 2*SignBitMask which will correctly sign // extend our result. - SDValue CstHighBit = - DAG.getConstant(1ULL << (EltSizeInBits - 1), dl, NarrowScalarVT); - SDValue SplatHighBit = DAG.getSplat(VT, dl, CstHighBit); + SDValue SplatHighBit = + DAG.getConstant(APInt::getSignMask(EltSizeInBits), dl, VT); // This does not induce recursion, all operands are constants. SDValue SignBitMask = DAG.getNode(LogicalOpc, dl, VT, SplatHighBit, Amt); SDValue FlippedSignBit = From 641fbf1524338c86c952ebb1ec8d2b497ada3cef Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 17 Dec 2024 18:49:50 +0000 Subject: [PATCH 03/35] [TySan] Add initial Type Sanitizer runtime (#76261) This patch introduces the runtime components for type sanitizer: a sanitizer for type-based aliasing violations. It is based on Hal Finkel's https://reviews.llvm.org/D32197. C/C++ have type-based aliasing rules, and LLVM's optimizer can exploit these given TBAA metadata added by Clang. Roughly, a pointer of given type cannot be used to access an object of a different type (with, of course, certain exceptions). Unfortunately, there's a lot of code in the wild that violates these rules (e.g. for type punning), and such code often must be built with -fno-strict-aliasing. Performance is often sacrificed as a result. Part of the problem is the difficulty of finding TBAA violations. Hopefully, this sanitizer will help. For each TBAA type-access descriptor, encoded in LLVM's IR using metadata, the corresponding instrumentation pass generates descriptor tables. Thus, for each type (and access descriptor), we have a unique pointer representation. Excepting anonymous-namespace types, these tables are comdat, so the pointer values should be unique across the program. The descriptors refer to other descriptors to form a type aliasing tree (just like LLVM's TBAA metadata does). The instrumentation handles the "fast path" (where the types match exactly and no partial-overlaps are detected), and defers to the runtime to handle all of the more-complicated cases. The runtime, of course, is also responsible for reporting errors when those are detected. The runtime uses essentially the same shadow memory region as tsan, and we use 8 bytes of shadow memory, the size of the pointer to the type descriptor, for every byte of accessed data in the program. The value 0 is used to represent an unknown type. The value -1 is used to represent an interior byte (a byte that is part of a type, but not the first byte). The instrumentation first checks for an exact match between the type of the current access and the type for that address recorded in the shadow memory. If it matches, it then checks the shadow for the remainder of the bytes in the type to make sure that they're all -1. If not, we call the runtime. If the exact match fails, we next check if the value is 0 (i.e. unknown). If it is, then we check the shadow for the remainder of the byes in the type (to make sure they're all 0). If they're not, we call the runtime. We then set the shadow for the access address and set the shadow for the remaining bytes in the type to -1 (i.e. marking them as interior bytes). If the type indicated by the shadow memory for the access address is neither an exact match nor 0, we call the runtime. The instrumentation pass inserts calls to the memset intrinsic to set the memory updated by memset, memcpy, and memmove, as well as allocas/byval (and for lifetime.start/end) to reset the shadow memory to reflect that the type is now unknown. The runtime intercepts memset, memcpy, etc. to perform the same function for the library calls. The runtime essentially repeats these checks, but uses the full TBAA algorithm, just as the compiler does, to determine when two types are permitted to alias. In a situation where access overlap has occurred and aliasing is not permitted, an error is generated. As a note, this implementation does not use the compressed shadow-memory scheme discussed previously (http://lists.llvm.org/pipermail/llvm-dev/2017-April/111766.html). That scheme would not handle the struct-path (i.e. structure offset) information that our TBAA represents. I expect we'll want to further work on compressing the shadow-memory representation, but I think it makes sense to do that as follow-up work. This includes build fixes for Linux from Mingjie Xu. Depends on #76260 (Clang support), #76259 (LLVM support) PR: https://github.com/llvm/llvm-project/pull/76261 --- clang/runtime/CMakeLists.txt | 2 +- .../cmake/Modules/AllSupportedArchDefs.cmake | 1 + compiler-rt/cmake/config-ix.cmake | 15 +- compiler-rt/lib/tysan/CMakeLists.txt | 64 ++++ compiler-rt/lib/tysan/lit.cfg | 35 ++ compiler-rt/lib/tysan/lit.site.cfg.in | 12 + compiler-rt/lib/tysan/tysan.cpp | 346 ++++++++++++++++++ compiler-rt/lib/tysan/tysan.h | 78 ++++ compiler-rt/lib/tysan/tysan.syms.extra | 2 + compiler-rt/lib/tysan/tysan_flags.inc | 17 + compiler-rt/lib/tysan/tysan_interceptors.cpp | 250 +++++++++++++ compiler-rt/lib/tysan/tysan_platform.h | 93 +++++ compiler-rt/test/tysan/CMakeLists.txt | 32 ++ compiler-rt/test/tysan/anon-ns.cpp | 41 +++ compiler-rt/test/tysan/anon-same-struct.c | 26 ++ compiler-rt/test/tysan/anon-struct.c | 27 ++ compiler-rt/test/tysan/basic.c | 65 ++++ compiler-rt/test/tysan/char-memcpy.c | 45 +++ .../test/tysan/constexpr-subobject.cpp | 25 ++ compiler-rt/test/tysan/global.c | 31 ++ compiler-rt/test/tysan/int-long.c | 21 ++ compiler-rt/test/tysan/lit.cfg.py | 147 ++++++++ compiler-rt/test/tysan/lit.site.cfg.py.in | 17 + compiler-rt/test/tysan/ptr-float.c | 19 + ...ruct-offset-multiple-compilation-units.cpp | 51 +++ compiler-rt/test/tysan/struct-offset.c | 26 ++ compiler-rt/test/tysan/struct.c | 39 ++ compiler-rt/test/tysan/union-wr-wr.c | 18 + compiler-rt/test/tysan/violation-pr45282.c | 32 ++ compiler-rt/test/tysan/violation-pr47137.c | 41 +++ compiler-rt/test/tysan/violation-pr51837.c | 34 ++ compiler-rt/test/tysan/violation-pr62544.c | 24 ++ compiler-rt/test/tysan/violation-pr62828.cpp | 44 +++ compiler-rt/test/tysan/violation-pr68655.cpp | 40 ++ compiler-rt/test/tysan/violation-pr86685.c | 29 ++ 35 files changed, 1787 insertions(+), 2 deletions(-) create mode 100644 compiler-rt/lib/tysan/CMakeLists.txt create mode 100644 compiler-rt/lib/tysan/lit.cfg create mode 100644 compiler-rt/lib/tysan/lit.site.cfg.in create mode 100644 compiler-rt/lib/tysan/tysan.cpp create mode 100644 compiler-rt/lib/tysan/tysan.h create mode 100644 compiler-rt/lib/tysan/tysan.syms.extra create mode 100644 compiler-rt/lib/tysan/tysan_flags.inc create mode 100644 compiler-rt/lib/tysan/tysan_interceptors.cpp create mode 100644 compiler-rt/lib/tysan/tysan_platform.h create mode 100644 compiler-rt/test/tysan/CMakeLists.txt create mode 100644 compiler-rt/test/tysan/anon-ns.cpp create mode 100644 compiler-rt/test/tysan/anon-same-struct.c create mode 100644 compiler-rt/test/tysan/anon-struct.c create mode 100644 compiler-rt/test/tysan/basic.c create mode 100644 compiler-rt/test/tysan/char-memcpy.c create mode 100644 compiler-rt/test/tysan/constexpr-subobject.cpp create mode 100644 compiler-rt/test/tysan/global.c create mode 100644 compiler-rt/test/tysan/int-long.c create mode 100644 compiler-rt/test/tysan/lit.cfg.py create mode 100644 compiler-rt/test/tysan/lit.site.cfg.py.in create mode 100644 compiler-rt/test/tysan/ptr-float.c create mode 100644 compiler-rt/test/tysan/struct-offset-multiple-compilation-units.cpp create mode 100644 compiler-rt/test/tysan/struct-offset.c create mode 100644 compiler-rt/test/tysan/struct.c create mode 100644 compiler-rt/test/tysan/union-wr-wr.c create mode 100644 compiler-rt/test/tysan/violation-pr45282.c create mode 100644 compiler-rt/test/tysan/violation-pr47137.c create mode 100644 compiler-rt/test/tysan/violation-pr51837.c create mode 100644 compiler-rt/test/tysan/violation-pr62544.c create mode 100644 compiler-rt/test/tysan/violation-pr62828.cpp create mode 100644 compiler-rt/test/tysan/violation-pr68655.cpp create mode 100644 compiler-rt/test/tysan/violation-pr86685.c diff --git a/clang/runtime/CMakeLists.txt b/clang/runtime/CMakeLists.txt index 65fcdc2868f03..ff2605b23d25b 100644 --- a/clang/runtime/CMakeLists.txt +++ b/clang/runtime/CMakeLists.txt @@ -122,7 +122,7 @@ if(LLVM_BUILD_EXTERNAL_COMPILER_RT AND EXISTS ${COMPILER_RT_SRC_ROOT}/) COMPONENT compiler-rt) # Add top-level targets that build specific compiler-rt runtimes. - set(COMPILER_RT_RUNTIMES fuzzer asan builtins dfsan lsan msan profile tsan ubsan ubsan-minimal) + set(COMPILER_RT_RUNTIMES fuzzer asan builtins dfsan lsan msan profile tsan tysan ubsan ubsan-minimal) foreach(runtime ${COMPILER_RT_RUNTIMES}) get_ext_project_build_command(build_runtime_cmd ${runtime}) add_custom_target(${runtime} diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index 5a1e8db61023b..ab5d55a9a35c0 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -85,6 +85,7 @@ else() set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X} ${LOONGARCH64} ${RISCV64}) endif() +set(ALL_TYSAN_SUPPORTED_ARCH ${X86_64} ${ARM64}) set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64} ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${LOONGARCH64}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 6d52eecc9a91f..cf729c3adb1f5 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -458,6 +458,7 @@ if(APPLE) set(SANITIZER_COMMON_SUPPORTED_OS osx) set(PROFILE_SUPPORTED_OS osx) set(TSAN_SUPPORTED_OS osx) + set(TYSAN_SUPPORTED_OS osx) set(XRAY_SUPPORTED_OS osx) set(FUZZER_SUPPORTED_OS osx) set(ORC_SUPPORTED_OS) @@ -593,6 +594,7 @@ if(APPLE) list(APPEND FUZZER_SUPPORTED_OS ${platform}) list(APPEND ORC_SUPPORTED_OS ${platform}) list(APPEND UBSAN_SUPPORTED_OS ${platform}) + list(APPEND TYSAN_SUPPORTED_OS ${platform}) list(APPEND LSAN_SUPPORTED_OS ${platform}) list(APPEND STATS_SUPPORTED_OS ${platform}) endif() @@ -651,6 +653,9 @@ if(APPLE) list_intersect(CTX_PROFILE_SUPPORTED_ARCH ALL_CTX_PROFILE_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) + list_intersect(TYSAN_SUPPORTED_ARCH + ALL_TYSAN_SUPPORTED_ARCH + SANITIZER_COMMON_SUPPORTED_ARCH) list_intersect(TSAN_SUPPORTED_ARCH ALL_TSAN_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) @@ -703,6 +708,7 @@ else() filter_available_targets(PROFILE_SUPPORTED_ARCH ${ALL_PROFILE_SUPPORTED_ARCH}) filter_available_targets(CTX_PROFILE_SUPPORTED_ARCH ${ALL_CTX_PROFILE_SUPPORTED_ARCH}) filter_available_targets(TSAN_SUPPORTED_ARCH ${ALL_TSAN_SUPPORTED_ARCH}) + filter_available_targets(TYSAN_SUPPORTED_ARCH ${ALL_TYSAN_SUPPORTED_ARCH}) filter_available_targets(UBSAN_SUPPORTED_ARCH ${ALL_UBSAN_SUPPORTED_ARCH}) filter_available_targets(SAFESTACK_SUPPORTED_ARCH ${ALL_SAFESTACK_SUPPORTED_ARCH}) @@ -748,7 +754,7 @@ if(COMPILER_RT_SUPPORTED_ARCH) endif() message(STATUS "Compiler-RT supported architectures: ${COMPILER_RT_SUPPORTED_ARCH}") -set(ALL_SANITIZERS asan;rtsan;dfsan;msan;hwasan;tsan;safestack;cfi;scudo_standalone;ubsan_minimal;gwp_asan;nsan;asan_abi) +set(ALL_SANITIZERS asan;rtsan;dfsan;msan;hwasan;tsan;tysan;safestack;cfi;scudo_standalone;ubsan_minimal;gwp_asan;nsan;asan_abi) set(COMPILER_RT_SANITIZERS_TO_BUILD all CACHE STRING "sanitizers to build if supported on the target (all;${ALL_SANITIZERS})") list_replace(COMPILER_RT_SANITIZERS_TO_BUILD all "${ALL_SANITIZERS}") @@ -843,6 +849,13 @@ else() set(COMPILER_RT_HAS_CTX_PROFILE FALSE) endif() +if (COMPILER_RT_HAS_SANITIZER_COMMON AND TYSAN_SUPPORTED_ARCH AND + OS_NAME MATCHES "Linux|Darwin") + set(COMPILER_RT_HAS_TYSAN TRUE) +else() + set(COMPILER_RT_HAS_TYSAN FALSE) +endif() + if (COMPILER_RT_HAS_SANITIZER_COMMON AND TSAN_SUPPORTED_ARCH) if (OS_NAME MATCHES "Linux|Darwin|FreeBSD|NetBSD") set(COMPILER_RT_HAS_TSAN TRUE) diff --git a/compiler-rt/lib/tysan/CMakeLists.txt b/compiler-rt/lib/tysan/CMakeLists.txt new file mode 100644 index 0000000000000..859b67928f004 --- /dev/null +++ b/compiler-rt/lib/tysan/CMakeLists.txt @@ -0,0 +1,64 @@ +include_directories(..) + +# Runtime library sources and build flags. +set(TYSAN_SOURCES + tysan.cpp + tysan_interceptors.cpp) +set(TYSAN_COMMON_CFLAGS ${SANITIZER_COMMON_CFLAGS}) +append_rtti_flag(OFF TYSAN_COMMON_CFLAGS) +# Prevent clang from generating libc calls. +append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding TYSAN_COMMON_CFLAGS) + +add_compiler_rt_object_libraries(RTTysan_dynamic + OS ${SANITIZER_COMMON_SUPPORTED_OS} + ARCHS ${TYSAN_SUPPORTED_ARCH} + SOURCES ${TYSAN_SOURCES} + ADDITIONAL_HEADERS ${TYSAN_HEADERS} + CFLAGS ${TYSAN_DYNAMIC_CFLAGS} + DEFS ${TYSAN_DYNAMIC_DEFINITIONS}) + + +# Static runtime library. +add_compiler_rt_component(tysan) + + +if(APPLE) + add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS) + + add_compiler_rt_runtime(clang_rt.tysan + SHARED + OS ${SANITIZER_COMMON_SUPPORTED_OS} + ARCHS ${TYSAN_SUPPORTED_ARCH} + OBJECT_LIBS RTTysan_dynamic + RTInterception + RTSanitizerCommon + RTSanitizerCommonLibc + RTSanitizerCommonSymbolizer + CFLAGS ${TYSAN_DYNAMIC_CFLAGS} + LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS} + DEFS ${TYSAN_DYNAMIC_DEFINITIONS} + PARENT_TARGET tysan) + + add_compiler_rt_runtime(clang_rt.tysan_static + STATIC + ARCHS ${TYSAN_SUPPORTED_ARCH} + OBJECT_LIBS RTTysan_static + CFLAGS ${TYSAN_CFLAGS} + DEFS ${TYSAN_COMMON_DEFINITIONS} + PARENT_TARGET tysan) +else() + foreach(arch ${TYSAN_SUPPORTED_ARCH}) + set(TYSAN_CFLAGS ${TYSAN_COMMON_CFLAGS}) + append_list_if(COMPILER_RT_HAS_FPIE_FLAG -fPIE TYSAN_CFLAGS) + add_compiler_rt_runtime(clang_rt.tysan + STATIC + ARCHS ${arch} + SOURCES ${TYSAN_SOURCES} + $ + $ + $ + $ + CFLAGS ${TYSAN_CFLAGS} + PARENT_TARGET tysan) + endforeach() +endif() diff --git a/compiler-rt/lib/tysan/lit.cfg b/compiler-rt/lib/tysan/lit.cfg new file mode 100644 index 0000000000000..e3ef6c9c97147 --- /dev/null +++ b/compiler-rt/lib/tysan/lit.cfg @@ -0,0 +1,35 @@ +# -*- Python -*- + +import os + +# Setup config name. +config.name = 'TypeSanitizer' + getattr(config, 'name_suffix', 'default') + +# Setup source root. +config.test_source_root = os.path.dirname(__file__) + +# Setup default compiler flags used with -fsanitize=type option. +clang_tysan_cflags = (["-fsanitize=type", + "-mno-omit-leaf-frame-pointer", + "-fno-omit-frame-pointer", + "-fno-optimize-sibling-calls"] + + config.target_cflags + + config.debug_info_flags) +clang_tysan_cxxflags = config.cxx_mode_flags + clang_tysan_cflags + +def build_invocation(compile_flags): + return " " + " ".join([config.clang] + compile_flags) + " " + +config.substitutions.append( ("%clang_tysan ", build_invocation(clang_tysan_cflags)) ) +config.substitutions.append( ("%clangxx_tysan ", build_invocation(clang_tysan_cxxflags)) ) + +# Default test suffixes. +config.suffixes = ['.c', '.cc', '.cpp'] + +# TypeSanitizer tests are currently supported on Linux only. +if config.host_os not in ['Linux']: + config.unsupported = True + +if config.target_arch != 'aarch64': + config.available_features.add('stable-runtime') + diff --git a/compiler-rt/lib/tysan/lit.site.cfg.in b/compiler-rt/lib/tysan/lit.site.cfg.in new file mode 100644 index 0000000000000..673d04e514379 --- /dev/null +++ b/compiler-rt/lib/tysan/lit.site.cfg.in @@ -0,0 +1,12 @@ +@LIT_SITE_CFG_IN_HEADER@ + +# Tool-specific config options. +config.name_suffix = "@TYSAN_TEST_CONFIG_SUFFIX@" +config.target_cflags = "@TYSAN_TEST_TARGET_CFLAGS@" +config.target_arch = "@TYSAN_TEST_TARGET_ARCH@" + +# Load common config for all compiler-rt lit tests. +lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") + +# Load tool-specific config that would do the real work. +lit_config.load_config(config, "@TYSAN_LIT_SOURCE_DIR@/lit.cfg") diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp new file mode 100644 index 0000000000000..39d78e7c95e0c --- /dev/null +++ b/compiler-rt/lib/tysan/tysan.cpp @@ -0,0 +1,346 @@ +//===-- tysan.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of TypeSanitizer. +// +// TypeSanitizer runtime. +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_flags.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_report_decorator.h" +#include "sanitizer_common/sanitizer_stacktrace.h" +#include "sanitizer_common/sanitizer_symbolizer.h" + +#include "tysan/tysan.h" + +#include + +using namespace __sanitizer; +using namespace __tysan; + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +tysan_set_type_unknown(const void *addr, uptr size) { + if (tysan_inited) + internal_memset(shadow_for(addr), 0, size * sizeof(uptr)); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +tysan_copy_types(const void *daddr, const void *saddr, uptr size) { + if (tysan_inited) + internal_memmove(shadow_for(daddr), shadow_for(saddr), size * sizeof(uptr)); +} + +static const char *getDisplayName(const char *Name) { + if (Name[0] == '\0') + return ""; + + // Clang generates tags for C++ types that demangle as typeinfo. Remove the + // prefix from the generated string. + const char *TIPrefix = "typeinfo name for "; + size_t TIPrefixLen = strlen(TIPrefix); + + const char *DName = Symbolizer::GetOrInit()->Demangle(Name); + if (!internal_strncmp(DName, TIPrefix, TIPrefixLen)) + DName += TIPrefixLen; + + return DName; +} + +static void printTDName(tysan_type_descriptor *td) { + if (((sptr)td) <= 0) { + Printf(""); + return; + } + + switch (td->Tag) { + default: + CHECK(false && "invalid enum value"); + break; + case TYSAN_MEMBER_TD: + printTDName(td->Member.Access); + if (td->Member.Access != td->Member.Base) { + Printf(" (in "); + printTDName(td->Member.Base); + Printf(" at offset %zu)", td->Member.Offset); + } + break; + case TYSAN_STRUCT_TD: + Printf("%s", getDisplayName( + (char *)(td->Struct.Members + td->Struct.MemberCount))); + break; + } +} + +static tysan_type_descriptor *getRootTD(tysan_type_descriptor *TD) { + tysan_type_descriptor *RootTD = TD; + + do { + RootTD = TD; + + if (TD->Tag == TYSAN_STRUCT_TD) { + if (TD->Struct.MemberCount > 0) + TD = TD->Struct.Members[0].Type; + else + TD = nullptr; + } else if (TD->Tag == TYSAN_MEMBER_TD) { + TD = TD->Member.Access; + } else { + CHECK(false && "invalid enum value"); + break; + } + } while (TD); + + return RootTD; +} + +static bool isAliasingLegalUp(tysan_type_descriptor *TDA, + tysan_type_descriptor *TDB, int TDAOffset) { + // Walk up the tree starting with TDA to see if we reach TDB. + uptr OffsetA = 0, OffsetB = 0; + if (TDB->Tag == TYSAN_MEMBER_TD) { + OffsetB = TDB->Member.Offset; + TDB = TDB->Member.Base; + } + + if (TDA->Tag == TYSAN_MEMBER_TD) { + OffsetA = TDA->Member.Offset - TDAOffset; + TDA = TDA->Member.Base; + } + + do { + if (TDA == TDB) + return OffsetA == OffsetB; + + if (TDA->Tag == TYSAN_STRUCT_TD) { + // Reached root type descriptor. + if (!TDA->Struct.MemberCount) + break; + + uptr Idx = 0; + for (; Idx < TDA->Struct.MemberCount - 1; ++Idx) { + if (TDA->Struct.Members[Idx].Offset >= OffsetA) + break; + } + + OffsetA -= TDA->Struct.Members[Idx].Offset; + TDA = TDA->Struct.Members[Idx].Type; + } else { + CHECK(false && "invalid enum value"); + break; + } + } while (TDA); + + return false; +} + +static bool isAliasingLegal(tysan_type_descriptor *TDA, + tysan_type_descriptor *TDB, int TDAOffset = 0) { + if (TDA == TDB || !TDB || !TDA) + return true; + + // Aliasing is legal is the two types have different root nodes. + if (getRootTD(TDA) != getRootTD(TDB)) + return true; + + // TDB may have been adjusted by offset TDAOffset in the caller to point to + // the outer type. Check for aliasing with and without adjusting for this + // offset. + return isAliasingLegalUp(TDA, TDB, 0) || isAliasingLegalUp(TDB, TDA, 0) || + isAliasingLegalUp(TDA, TDB, TDAOffset); +} + +namespace __tysan { +class Decorator : public __sanitizer::SanitizerCommonDecorator { +public: + Decorator() : SanitizerCommonDecorator() {} + const char *Warning() { return Red(); } + const char *Name() { return Green(); } + const char *End() { return Default(); } +}; +} // namespace __tysan + +ALWAYS_INLINE +static void reportError(void *Addr, int Size, tysan_type_descriptor *TD, + tysan_type_descriptor *OldTD, const char *AccessStr, + const char *DescStr, int Offset, uptr pc, uptr bp, + uptr sp) { + Decorator d; + Printf("%s", d.Warning()); + Report("ERROR: TypeSanitizer: type-aliasing-violation on address %p" + " (pc %p bp %p sp %p tid %llu)\n", + Addr, (void *)pc, (void *)bp, (void *)sp, GetTid()); + Printf("%s", d.End()); + Printf("%s of size %d at %p with type ", AccessStr, Size, Addr); + + Printf("%s", d.Name()); + printTDName(TD); + Printf("%s", d.End()); + + Printf(" %s of type ", DescStr); + + Printf("%s", d.Name()); + printTDName(OldTD); + Printf("%s", d.End()); + + if (Offset != 0) + Printf(" that starts at offset %d\n", Offset); + else + Printf("\n"); + + if (pc) { + + bool request_fast = StackTrace::WillUseFastUnwind(true); + BufferedStackTrace ST; + ST.Unwind(kStackTraceMax, pc, bp, 0, 0, 0, request_fast); + ST.Print(); + } else { + Printf("\n"); + } +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__tysan_check(void *addr, int size, tysan_type_descriptor *td, int flags) { + GET_CALLER_PC_BP_SP; + + bool IsRead = flags & 1; + bool IsWrite = flags & 2; + const char *AccessStr; + if (IsRead && !IsWrite) + AccessStr = "READ"; + else if (!IsRead && IsWrite) + AccessStr = "WRITE"; + else + AccessStr = "ATOMIC UPDATE"; + + tysan_type_descriptor **OldTDPtr = shadow_for(addr); + tysan_type_descriptor *OldTD = *OldTDPtr; + if (((sptr)OldTD) < 0) { + int i = -((sptr)OldTD); + OldTDPtr -= i; + OldTD = *OldTDPtr; + + if (!isAliasingLegal(td, OldTD, i)) + reportError(addr, size, td, OldTD, AccessStr, + "accesses part of an existing object", -i, pc, bp, sp); + + return; + } + + if (!isAliasingLegal(td, OldTD)) { + reportError(addr, size, td, OldTD, AccessStr, "accesses an existing object", + 0, pc, bp, sp); + return; + } + + // These types are allowed to alias (or the stored type is unknown), report + // an error if we find an interior type. + + for (int i = 0; i < size; ++i) { + OldTDPtr = shadow_for((void *)(((uptr)addr) + i)); + OldTD = *OldTDPtr; + if (((sptr)OldTD) >= 0 && !isAliasingLegal(td, OldTD)) + reportError(addr, size, td, OldTD, AccessStr, + "partially accesses an object", i, pc, bp, sp); + } +} + +Flags __tysan::flags_data; + +SANITIZER_INTERFACE_ATTRIBUTE uptr __tysan_shadow_memory_address; +SANITIZER_INTERFACE_ATTRIBUTE uptr __tysan_app_memory_mask; + +#ifdef TYSAN_RUNTIME_VMA +// Runtime detected VMA size. +int __tysan::vmaSize; +#endif + +void Flags::SetDefaults() { +#define TYSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "tysan_flags.inc" +#undef TYSAN_FLAG +} + +static void RegisterTySanFlags(FlagParser *parser, Flags *f) { +#define TYSAN_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(parser, #Name, Description, &f->Name); +#include "tysan_flags.inc" +#undef TYSAN_FLAG +} + +static void InitializeFlags() { + SetCommonFlagsDefaults(); + { + CommonFlags cf; + cf.CopyFrom(*common_flags()); + cf.external_symbolizer_path = GetEnv("TYSAN_SYMBOLIZER_PATH"); + OverrideCommonFlags(cf); + } + + flags().SetDefaults(); + + FlagParser parser; + RegisterCommonFlags(&parser); + RegisterTySanFlags(&parser, &flags()); + parser.ParseString(GetEnv("TYSAN_OPTIONS")); + InitializeCommonFlags(); + if (Verbosity()) + ReportUnrecognizedFlags(); + if (common_flags()->help) + parser.PrintFlagDescriptions(); +} + +static void TySanInitializePlatformEarly() { + AvoidCVE_2016_2143(); +#ifdef TYSAN_RUNTIME_VMA + vmaSize = (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1); +#if defined(__aarch64__) && !SANITIZER_APPLE + if (vmaSize != 39 && vmaSize != 42 && vmaSize != 48) { + Printf("FATAL: TypeSanitizer: unsupported VMA range\n"); + Printf("FATAL: Found %d - Supported 39, 42 and 48\n", vmaSize); + Die(); + } +#endif +#endif + + __sanitizer::InitializePlatformEarly(); + + __tysan_shadow_memory_address = ShadowAddr(); + __tysan_app_memory_mask = AppMask(); +} + +namespace __tysan { +bool tysan_inited = false; +bool tysan_init_is_running; +} // namespace __tysan + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tysan_init() { + CHECK(!tysan_init_is_running); + if (tysan_inited) + return; + tysan_init_is_running = true; + + InitializeFlags(); + TySanInitializePlatformEarly(); + + InitializeInterceptors(); + + if (!MmapFixedNoReserve(ShadowAddr(), AppAddr() - ShadowAddr())) + Die(); + + tysan_init_is_running = false; + tysan_inited = true; +} + +#if SANITIZER_CAN_USE_PREINIT_ARRAY +__attribute__((section(".preinit_array"), + used)) static void (*tysan_init_ptr)() = __tysan_init; +#endif diff --git a/compiler-rt/lib/tysan/tysan.h b/compiler-rt/lib/tysan/tysan.h new file mode 100644 index 0000000000000..97df28037b0d2 --- /dev/null +++ b/compiler-rt/lib/tysan/tysan.h @@ -0,0 +1,78 @@ +//===-- tysan.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of TypeSanitizer. +// +// Private TySan header. +//===----------------------------------------------------------------------===// + +#ifndef TYSAN_H +#define TYSAN_H + +#include "sanitizer_common/sanitizer_internal_defs.h" + +using __sanitizer::sptr; +using __sanitizer::u16; +using __sanitizer::uptr; + +#include "tysan_platform.h" + +extern "C" { +void tysan_set_type_unknown(const void *addr, uptr size); +void tysan_copy_types(const void *daddr, const void *saddr, uptr size); +} + +namespace __tysan { +extern bool tysan_inited; +extern bool tysan_init_is_running; + +void InitializeInterceptors(); + +enum { TYSAN_MEMBER_TD = 1, TYSAN_STRUCT_TD = 2 }; + +struct tysan_member_type_descriptor { + struct tysan_type_descriptor *Base; + struct tysan_type_descriptor *Access; + uptr Offset; +}; + +struct tysan_struct_type_descriptor { + uptr MemberCount; + struct { + struct tysan_type_descriptor *Type; + uptr Offset; + } Members[1]; // Tail allocated. +}; + +struct tysan_type_descriptor { + uptr Tag; + union { + tysan_member_type_descriptor Member; + tysan_struct_type_descriptor Struct; + }; +}; + +inline tysan_type_descriptor **shadow_for(const void *ptr) { + return (tysan_type_descriptor **)((((uptr)ptr) & AppMask()) * sizeof(ptr) + + ShadowAddr()); +} + +struct Flags { +#define TYSAN_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "tysan_flags.inc" +#undef TYSAN_FLAG + + void SetDefaults(); +}; + +extern Flags flags_data; +inline Flags &flags() { return flags_data; } + +} // namespace __tysan + +#endif // TYSAN_H diff --git a/compiler-rt/lib/tysan/tysan.syms.extra b/compiler-rt/lib/tysan/tysan.syms.extra new file mode 100644 index 0000000000000..04e7854316199 --- /dev/null +++ b/compiler-rt/lib/tysan/tysan.syms.extra @@ -0,0 +1,2 @@ +tysan_* +__tysan_* diff --git a/compiler-rt/lib/tysan/tysan_flags.inc b/compiler-rt/lib/tysan/tysan_flags.inc new file mode 100644 index 0000000000000..98b6591f844ef --- /dev/null +++ b/compiler-rt/lib/tysan/tysan_flags.inc @@ -0,0 +1,17 @@ +//===-- tysan_flags.inc ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TySan runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef TYSAN_FLAG +#error "Define TYSAN_FLAG prior to including this file!" +#endif + +// TYSAN_FLAG(Type, Name, DefaultValue, Description) +// See COMMON_FLAG in sanitizer_flags.inc for more details. diff --git a/compiler-rt/lib/tysan/tysan_interceptors.cpp b/compiler-rt/lib/tysan/tysan_interceptors.cpp new file mode 100644 index 0000000000000..5fc6f24412272 --- /dev/null +++ b/compiler-rt/lib/tysan/tysan_interceptors.cpp @@ -0,0 +1,250 @@ +//===-- tysan_interceptors.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of TypeSanitizer. +// +// Interceptors for standard library functions. +//===----------------------------------------------------------------------===// + +#include "interception/interception.h" +#include "sanitizer_common/sanitizer_common.h" +#include "tysan/tysan.h" + +#if SANITIZER_LINUX && !SANITIZER_ANDROID +#define TYSAN_INTERCEPT___STRDUP 1 +#else +#define TYSAN_INTERCEPT___STRDUP 0 +#endif + +#if SANITIZER_LINUX +extern "C" int mallopt(int param, int value); +#endif + +using namespace __sanitizer; +using namespace __tysan; + +static const uptr early_alloc_buf_size = 16384; +static uptr allocated_bytes; +static char early_alloc_buf[early_alloc_buf_size]; + +static bool isInEarlyAllocBuf(const void *ptr) { + return ((uptr)ptr >= (uptr)early_alloc_buf && + ((uptr)ptr - (uptr)early_alloc_buf) < sizeof(early_alloc_buf)); +} + +// Handle allocation requests early (before all interceptors are setup). dlsym, +// for example, calls calloc. +static void *handleEarlyAlloc(uptr size) { + void *mem = (void *)&early_alloc_buf[allocated_bytes]; + allocated_bytes += size; + CHECK_LT(allocated_bytes, early_alloc_buf_size); + return mem; +} + +INTERCEPTOR(void *, memset, void *dst, int v, uptr size) { + if (!tysan_inited && REAL(memset) == nullptr) + return internal_memset(dst, v, size); + + void *res = REAL(memset)(dst, v, size); + tysan_set_type_unknown(dst, size); + return res; +} + +INTERCEPTOR(void *, memmove, void *dst, const void *src, uptr size) { + if (!tysan_inited && REAL(memmove) == nullptr) + return internal_memmove(dst, src, size); + + void *res = REAL(memmove)(dst, src, size); + tysan_copy_types(dst, src, size); + return res; +} + +INTERCEPTOR(void *, memcpy, void *dst, const void *src, uptr size) { + if (!tysan_inited && REAL(memcpy) == nullptr) { + // memmove is used here because on some platforms this will also + // intercept the memmove implementation. + return internal_memmove(dst, src, size); + } + + void *res = REAL(memcpy)(dst, src, size); + tysan_copy_types(dst, src, size); + return res; +} + +INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags, + int fd, OFF_T offset) { + void *res = REAL(mmap)(addr, length, prot, flags, fd, offset); + if (res != (void *)-1) + tysan_set_type_unknown(res, RoundUpTo(length, GetPageSize())); + return res; +} + +#if !SANITIZER_APPLE +INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags, + int fd, OFF64_T offset) { + void *res = REAL(mmap64)(addr, length, prot, flags, fd, offset); + if (res != (void *)-1) + tysan_set_type_unknown(res, RoundUpTo(length, GetPageSize())); + return res; +} +#endif + +INTERCEPTOR(char *, strdup, const char *s) { + char *res = REAL(strdup)(s); + if (res) + tysan_copy_types(res, const_cast(s), internal_strlen(s)); + return res; +} + +#if TYSAN_INTERCEPT___STRDUP +INTERCEPTOR(char *, __strdup, const char *s) { + char *res = REAL(__strdup)(s); + if (res) + tysan_copy_types(res, const_cast(s), internal_strlen(s)); + return res; +} +#endif // TYSAN_INTERCEPT___STRDUP + +INTERCEPTOR(void *, malloc, uptr size) { + if (tysan_init_is_running && REAL(malloc) == nullptr) + return handleEarlyAlloc(size); + + void *res = REAL(malloc)(size); + if (res) + tysan_set_type_unknown(res, size); + return res; +} + +INTERCEPTOR(void *, realloc, void *ptr, uptr size) { + void *res = REAL(realloc)(ptr, size); + // We might want to copy the types from the original allocation (although + // that would require that we knew its size). + if (res) + tysan_set_type_unknown(res, size); + return res; +} + +INTERCEPTOR(void *, calloc, uptr nmemb, uptr size) { + if (tysan_init_is_running && REAL(calloc) == nullptr) + return handleEarlyAlloc(nmemb * size); + + void *res = REAL(calloc)(nmemb, size); + if (res) + tysan_set_type_unknown(res, nmemb * size); + return res; +} + +INTERCEPTOR(void, free, void *p) { + // There are only a few early allocation requests, + // so we simply skip the free. + if (isInEarlyAllocBuf(p)) + return; + REAL(free)(p); +} + +INTERCEPTOR(void *, valloc, uptr size) { + void *res = REAL(valloc)(size); + if (res) + tysan_set_type_unknown(res, size); + return res; +} + +#if SANITIZER_INTERCEPT_MEMALIGN +INTERCEPTOR(void *, memalign, uptr alignment, uptr size) { + void *res = REAL(memalign)(alignment, size); + if (res) + tysan_set_type_unknown(res, size); + return res; +} +#define TYSAN_MAYBE_INTERCEPT_MEMALIGN INTERCEPT_FUNCTION(memalign) +#else +#define TYSAN_MAYBE_INTERCEPT_MEMALIGN +#endif // SANITIZER_INTERCEPT_MEMALIGN + +#if SANITIZER_INTERCEPT___LIBC_MEMALIGN +INTERCEPTOR(void *, __libc_memalign, uptr alignment, uptr size) { + void *res = REAL(__libc_memalign)(alignment, size); + if (res) + tysan_set_type_unknown(res, size); + return res; +} +#define TYSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN \ + INTERCEPT_FUNCTION(__libc_memalign) +#else +#define TYSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN +#endif // SANITIZER_INTERCEPT___LIBC_MEMALIGN + +#if SANITIZER_INTERCEPT_PVALLOC +INTERCEPTOR(void *, pvalloc, uptr size) { + void *res = REAL(pvalloc)(size); + if (res) + tysan_set_type_unknown(res, size); + return res; +} +#define TYSAN_MAYBE_INTERCEPT_PVALLOC INTERCEPT_FUNCTION(pvalloc) +#else +#define TYSAN_MAYBE_INTERCEPT_PVALLOC +#endif // SANITIZER_INTERCEPT_PVALLOC + +#if SANITIZER_INTERCEPT_ALIGNED_ALLOC +INTERCEPTOR(void *, aligned_alloc, uptr alignment, uptr size) { + void *res = REAL(aligned_alloc)(alignment, size); + if (res) + tysan_set_type_unknown(res, size); + return res; +} +#define TYSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC INTERCEPT_FUNCTION(aligned_alloc) +#else +#define TYSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC +#endif + +INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) { + int res = REAL(posix_memalign)(memptr, alignment, size); + if (res == 0 && *memptr) + tysan_set_type_unknown(*memptr, size); + return res; +} + +namespace __tysan { +void InitializeInterceptors() { + static int inited = 0; + CHECK_EQ(inited, 0); + + // Instruct libc malloc to consume less memory. +#if SANITIZER_LINUX + mallopt(1, 0); // M_MXFAST + mallopt(-3, 32 * 1024); // M_MMAP_THRESHOLD +#endif + + INTERCEPT_FUNCTION(mmap); + + INTERCEPT_FUNCTION(mmap64); + + INTERCEPT_FUNCTION(strdup); +#if TYSAN_INTERCEPT___STRDUP + INTERCEPT_FUNCTION(__strdup); +#endif + + INTERCEPT_FUNCTION(malloc); + INTERCEPT_FUNCTION(calloc); + INTERCEPT_FUNCTION(free); + INTERCEPT_FUNCTION(realloc); + INTERCEPT_FUNCTION(valloc); + TYSAN_MAYBE_INTERCEPT_MEMALIGN; + TYSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN; + TYSAN_MAYBE_INTERCEPT_PVALLOC; + TYSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC + INTERCEPT_FUNCTION(posix_memalign); + + INTERCEPT_FUNCTION(memset); + INTERCEPT_FUNCTION(memmove); + INTERCEPT_FUNCTION(memcpy); + + inited = 1; +} +} // namespace __tysan diff --git a/compiler-rt/lib/tysan/tysan_platform.h b/compiler-rt/lib/tysan/tysan_platform.h new file mode 100644 index 0000000000000..f01392885d939 --- /dev/null +++ b/compiler-rt/lib/tysan/tysan_platform.h @@ -0,0 +1,93 @@ +//===------------------------ tysan_platform.h ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of TypeSanitizer. +// +// Platform specific information for TySan. +//===----------------------------------------------------------------------===// + +#ifndef TYSAN_PLATFORM_H +#define TYSAN_PLATFORM_H + +namespace __tysan { + +#if defined(__x86_64__) || SANITIZER_APPLE +struct Mapping { + static const uptr kShadowAddr = 0x010000000000ull; + static const uptr kAppAddr = 0x550000000000ull; + static const uptr kAppMemMsk = ~0x780000000000ull; +}; +#elif defined(__aarch64__) +struct Mapping39 { + static const uptr kShadowAddr = 0x0800000000ull; + static const uptr kAppAddr = 0x5500000000ull; + static const uptr kAppMemMsk = ~0x7800000000ull; +}; + +struct Mapping42 { + static const uptr kShadowAddr = 0x10000000000ull; + static const uptr kAppAddr = 0x2aa00000000ull; + static const uptr kAppMemMsk = ~0x3c000000000ull; +}; + +struct Mapping48 { + static const uptr kShadowAddr = 0x0002000000000ull; + static const uptr kAppAddr = 0x0aaaa00000000ull; + static const uptr kAppMemMsk = ~0x0fff800000000ull; +}; +#define TYSAN_RUNTIME_VMA 1 +#else +#error "TySan not supported for this platform!" +#endif + +#if TYSAN_RUNTIME_VMA +extern int vmaSize; +#endif + +enum MappingType { MAPPING_SHADOW_ADDR, MAPPING_APP_ADDR, MAPPING_APP_MASK }; + +template uptr MappingImpl(void) { + switch (Type) { + case MAPPING_SHADOW_ADDR: + return Mapping::kShadowAddr; + case MAPPING_APP_ADDR: + return Mapping::kAppAddr; + case MAPPING_APP_MASK: + return Mapping::kAppMemMsk; + } +} + +template uptr MappingArchImpl(void) { +#if defined(__aarch64__) && !SANITIZER_APPLE + switch (vmaSize) { + case 39: + return MappingImpl(); + case 42: + return MappingImpl(); + case 48: + return MappingImpl(); + } + DCHECK(0); + return 0; +#else + return MappingImpl(); +#endif +} + +ALWAYS_INLINE +uptr ShadowAddr() { return MappingArchImpl(); } + +ALWAYS_INLINE +uptr AppAddr() { return MappingArchImpl(); } + +ALWAYS_INLINE +uptr AppMask() { return MappingArchImpl(); } + +} // namespace __tysan + +#endif diff --git a/compiler-rt/test/tysan/CMakeLists.txt b/compiler-rt/test/tysan/CMakeLists.txt new file mode 100644 index 0000000000000..76f57501e854e --- /dev/null +++ b/compiler-rt/test/tysan/CMakeLists.txt @@ -0,0 +1,32 @@ +set(TYSAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + +set(TYSAN_TESTSUITES) + +set(TYSAN_TEST_ARCH ${TYSAN_SUPPORTED_ARCH}) +if(APPLE) + darwin_filter_host_archs(TYSAN_SUPPORTED_ARCH TYSAN_TEST_ARCH) +endif() + +foreach(arch ${TYSAN_TEST_ARCH}) + set(TYSAN_TEST_TARGET_ARCH ${arch}) + string(TOLOWER "-${arch}" TYSAN_TEST_CONFIG_SUFFIX) + get_test_cc_for_arch(${arch} TYSAN_TEST_TARGET_CC TYSAN_TEST_TARGET_CFLAGS) + string(TOUPPER ${arch} ARCH_UPPER_CASE) + set(CONFIG_NAME ${ARCH_UPPER_CASE}Config) + + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py) + list(APPEND TYSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) +endforeach() + +set(TYSAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS}) +if(NOT COMPILER_RT_STANDALONE_BUILD) + list(APPEND TYSAN_TEST_DEPS tysan) +endif() + +add_lit_testsuite(check-tysan "Running the TypeSanitizer tests" + ${TYSAN_TESTSUITES} + DEPENDS ${TYSAN_TEST_DEPS} + ) +set_target_properties(check-tysan PROPERTIES FOLDER "Compiler-RT Misc") diff --git a/compiler-rt/test/tysan/anon-ns.cpp b/compiler-rt/test/tysan/anon-ns.cpp new file mode 100644 index 0000000000000..681304411df31 --- /dev/null +++ b/compiler-rt/test/tysan/anon-ns.cpp @@ -0,0 +1,41 @@ +// RUN: %clangxx_tysan -O0 %s -c -o %t.o +// RUN: %clangxx_tysan -O0 %s -DPMAIN -c -o %tm.o +// RUN: %clangxx_tysan -O0 %t.o %tm.o -o %t +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +// This test demonstrates that the types from anonymous namespaces are +// different in different translation units (while the char* type is the same). + +namespace { +struct X { + X(int i, int j) : a(i), b(j) {} + int a; + int b; +}; +} // namespace + +#ifdef PMAIN +void foo(void *context, int i); +char fbyte(void *context); + +int main() { + X x(5, 6); + foo((void *)&x, 8); + std::cout << "fbyte: " << fbyte((void *)&x) << "\n"; +} +#else +void foo(void *context, int i) { + X *x = (X *)context; + x->b = i; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type int (in (anonymous namespace)::X at offset 4) accesses an existing object of type int (in (anonymous namespace)::X at offset 4) + // CHECK: {{#0 0x.* in foo\(void\*, int\) .*anon-ns.cpp:}}[[@LINE-3]] +} + +char fbyte(void *context) { return *(char *)context; } +#endif + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/anon-same-struct.c b/compiler-rt/test/tysan/anon-same-struct.c new file mode 100644 index 0000000000000..b9044f2a0a73c --- /dev/null +++ b/compiler-rt/test/tysan/anon-same-struct.c @@ -0,0 +1,26 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +// The two anonymous structs are structurally identical. As a result, we don't +// report an aliasing violation here. +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation + +typedef struct { + int i1; +} s1; +typedef struct { + int i2; +} s2; + +void f(s1 *s1p, s2 *s2p) { + s1p->i1 = 2; + s2p->i2 = 3; + printf("%i\n", s1p->i1); +} + +int main() { + s1 s = {.i1 = 1}; + f(&s, (s2 *)&s); +} diff --git a/compiler-rt/test/tysan/anon-struct.c b/compiler-rt/test/tysan/anon-struct.c new file mode 100644 index 0000000000000..25f6633545928 --- /dev/null +++ b/compiler-rt/test/tysan/anon-struct.c @@ -0,0 +1,27 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +typedef struct { + int i1, i1b; +} s1; +typedef struct { + int i2, i2b, i2c; +} s2; + +void f(s1 *s1p, s2 *s2p) { + s1p->i1 = 2; + s2p->i2 = 3; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type int (in at offset 0) accesses an existing object of type int (in at offset 0) + // CHECK: {{#0 0x.* in f .*anon-struct.c:}}[[@LINE-3]] + printf("%i\n", s1p->i1); +} + +int main() { + s1 s = {.i1 = 1, .i1b = 5}; + f(&s, (s2 *)&s); +} + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/basic.c b/compiler-rt/test/tysan/basic.c new file mode 100644 index 0000000000000..8e66e1a721383 --- /dev/null +++ b/compiler-rt/test/tysan/basic.c @@ -0,0 +1,65 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t 10 >%t.out.0 2>&1 +// RUN: FileCheck %s < %t.out.0 +// RUN: %clang_tysan -O2 %s -o %t && %run %t 10 >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include +#include +#include + +void __attribute__((noinline)) add_flt(float *a) { + *a += 2.0f; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: READ of size 4 at {{.*}} with type float accesses an existing object of type int + // CHECK: {{#0 0x.* in add_flt .*basic.c:}}[[@LINE-3]] + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type int + // CHECK: {{#0 0x.* in add_flt .*basic.c:}}[[@LINE-6]] + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: READ of size 4 at {{.*}} with type float accesses an existing object of type long + // CHECK: {{#0 0x.* in add_flt .*basic.c:}}[[@LINE-9]] + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type long + // CHECK: {{#0 0x.* in add_flt .*basic.c:}}[[@LINE-12]] + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: READ of size 4 at {{.*}} with type float accesses part of an existing object of type long that starts at offset -4 + // CHECK: {{#0 0x.* in add_flt .*basic.c:}}[[@LINE-15]] + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type float accesses part of an existing object of type long that starts at offset -4 + // CHECK: {{#0 0x.* in add_flt .*basic.c:}}[[@LINE-18]] + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: READ of size 4 at {{.*}} with type float partially accesses an object of type short that starts at offset 2 + // CHECK: {{#0 0x.* in add_flt .*basic.c:}}[[@LINE-21]] +} + +int main(int argc, char *argv[]) { + int x = atoi(argv[1]); + add_flt((float *)&x); + printf("x = %d\n", x); + + long y = x; + add_flt((float *)&y); + printf("y = %ld\n", y); + + add_flt(((float *)&y) + 1); + printf("y = %ld\n", y); + + char *mem = (char *)malloc(4 * sizeof(short)); + memset(mem, 0, 4 * sizeof(short)); + *(short *)(mem + 2) = x; + add_flt((float *)mem); + short s1 = *(short *)mem; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: READ of size 2 at {{.*}} with type short accesses an existing object of type float + // CHECK: {{#0 0x.* in main .*basic.c:}}[[@LINE-3]] + short s2 = *(short *)(mem + 2); + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: READ of size 2 at {{.*}} with type short accesses part of an existing object of type float that starts at offset -2 + // CHECK: {{#0 0x.* in main .*basic.c:}}[[@LINE-3]] + printf("m[0] = %d, m[1] = %d\n", s1, s2); + free(mem); + + return 0; +} + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/char-memcpy.c b/compiler-rt/test/tysan/char-memcpy.c new file mode 100644 index 0000000000000..ebbb6b53d0f37 --- /dev/null +++ b/compiler-rt/test/tysan/char-memcpy.c @@ -0,0 +1,45 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out.0 2>&1 +// RUN: FileCheck %s < %t.out.0 +// RUN: %clang_tysan -O2 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +// There's no type-based-aliasing violation here: the memcpy is implemented +// using only char* or unsigned char* (both of which may alias anything). +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation + +void my_memcpy_uchar(void *dest, void *src, int n) { + unsigned char *p = dest, *q = src, *end = p + n; + while (p < end) + *p++ = *q++; +} + +void my_memcpy_char(void *dest, void *src, int n) { + char *p = dest, *q = src, *end = p + n; + while (p < end) + *p++ = *q++; +} + +void test_uchar() { + struct S { + short x; + short *r; + } s = {10, &s.x}, s2; + my_memcpy_uchar(&s2, &s, sizeof(struct S)); + printf("%d\n", *(s2.r)); +} + +void test_char() { + struct S { + short x; + short *r; + } s = {10, &s.x}, s2; + my_memcpy_char(&s2, &s, sizeof(struct S)); + printf("%d\n", *(s2.r)); +} + +int main() { + test_uchar(); + test_char(); +} diff --git a/compiler-rt/test/tysan/constexpr-subobject.cpp b/compiler-rt/test/tysan/constexpr-subobject.cpp new file mode 100644 index 0000000000000..c473ffe5e445b --- /dev/null +++ b/compiler-rt/test/tysan/constexpr-subobject.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck --allow-empty %s < %t.out + +// CHECK-NOT: TypeSanitizer + +int foo() { return 0; } + +struct Bar { + struct S2 { + int (*fnA)(); + int (*fnB)(); + }; + + static int x() { return 0; } + + static const S2 &get() { + static constexpr S2 Info = {&foo, &Bar::x}; + return Info; + } +}; + +int main() { + auto Info = Bar::get(); + return Info.fnB(); +} diff --git a/compiler-rt/test/tysan/global.c b/compiler-rt/test/tysan/global.c new file mode 100644 index 0000000000000..247ee768a8162 --- /dev/null +++ b/compiler-rt/test/tysan/global.c @@ -0,0 +1,31 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out +#include +#include + +float P; +long L; + +int main() { + *(int *)&P = 5; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type int accesses an existing object of type float + // CHECK: {{#0 0x.* in main .*global.c:}}[[@LINE-3]] + + void *mem = malloc(sizeof(long)); + *(int *)mem = 6; + memcpy(mem, &L, sizeof(L)); + *(int *)mem = 8; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type int accesses an existing object of type long + // CHECK: {{#0 0x.* in main .*global.c:}}[[@LINE-3]] + int r = *(((int *)mem) + 1); + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: READ of size 4 at {{.*}} with type int accesses part of an existing object of type long that starts at offset -4 + // CHECK: {{#0 0x.* in main .*global.c:}}[[@LINE-3]] + free(mem); + + return r; +} + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/int-long.c b/compiler-rt/test/tysan/int-long.c new file mode 100644 index 0000000000000..b7956c07376e8 --- /dev/null +++ b/compiler-rt/test/tysan/int-long.c @@ -0,0 +1,21 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +long foo(int *x, long *y) { + *x = 0; + *y = 1; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 8 at {{.*}} with type long accesses an existing object of type int + // CHECK: {{#0 0x.* in foo .*int-long.c:}}[[@LINE-3]] + + return *x; +} + +int main(void) { + long l; + printf("%ld\n", foo((int *)&l, &l)); +} + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/lit.cfg.py b/compiler-rt/test/tysan/lit.cfg.py new file mode 100644 index 0000000000000..817a3f0921e8d --- /dev/null +++ b/compiler-rt/test/tysan/lit.cfg.py @@ -0,0 +1,147 @@ +# -*- Python -*- + +import os +import platform +import re +import shlex + +import lit.formats + + +def get_required_attr(config, attr_name): + attr_value = getattr(config, attr_name, None) + if attr_value is not None: + return attr_value + + lit_config.fatal( + "No attribute %r in test configuration! You may need to run " + "tests from your build directory or add this attribute " + "to lit.site.cfg.py " % attr_name + ) + + +def push_dynamic_library_lookup_path(config, new_path): + if platform.system() == "Windows": + dynamic_library_lookup_var = "PATH" + elif platform.system() == "Darwin": + dynamic_library_lookup_var = "DYLD_LIBRARY_PATH" + else: + dynamic_library_lookup_var = "LD_LIBRARY_PATH" + + new_ld_library_path = os.path.pathsep.join( + (new_path, config.environment.get(dynamic_library_lookup_var, "")) + ) + config.environment[dynamic_library_lookup_var] = new_ld_library_path + + if platform.system() == "FreeBSD": + dynamic_library_lookup_var = "LD_32_LIBRARY_PATH" + new_ld_32_library_path = os.path.pathsep.join( + (new_path, config.environment.get(dynamic_library_lookup_var, "")) + ) + config.environment[dynamic_library_lookup_var] = new_ld_32_library_path + + if platform.system() == "SunOS": + dynamic_library_lookup_var = "LD_LIBRARY_PATH_32" + new_ld_library_path_32 = os.path.pathsep.join( + (new_path, config.environment.get(dynamic_library_lookup_var, "")) + ) + config.environment[dynamic_library_lookup_var] = new_ld_library_path_32 + + dynamic_library_lookup_var = "LD_LIBRARY_PATH_64" + new_ld_library_path_64 = os.path.pathsep.join( + (new_path, config.environment.get(dynamic_library_lookup_var, "")) + ) + config.environment[dynamic_library_lookup_var] = new_ld_library_path_64 + + +# Setup config name. +config.name = "TypeSanitizer" + config.name_suffix + +# Platform-specific default TYSAN_OPTIONS for lit tests. +default_tysan_opts = list(config.default_sanitizer_opts) + +default_tysan_opts_str = ":".join(default_tysan_opts) +if default_tysan_opts_str: + config.environment["TYSAN_OPTIONS"] = default_tysan_opts_str + default_tysan_opts_str += ":" +config.substitutions.append( + ("%env_tysan_opts=", "env TYSAN_OPTIONS=" + default_tysan_opts_str) +) + +# Setup source root. +config.test_source_root = os.path.dirname(__file__) + +if config.host_os not in ["FreeBSD", "NetBSD"]: + libdl_flag = "-ldl" +else: + libdl_flag = "" + +# GCC-ASan doesn't link in all the necessary libraries automatically, so +# we have to do it ourselves. +if config.compiler_id == "GNU": + extra_link_flags = ["-pthread", "-lstdc++", libdl_flag] +else: + extra_link_flags = [] + +# Setup default compiler flags used with -fsanitize=address option. +# FIXME: Review the set of required flags and check if it can be reduced. +target_cflags = [get_required_attr(config, "target_cflags")] + extra_link_flags +target_cxxflags = config.cxx_mode_flags + target_cflags +clang_tysan_static_cflags = ( + [ + "-fsanitize=type", + "-mno-omit-leaf-frame-pointer", + "-fno-omit-frame-pointer", + "-fno-optimize-sibling-calls", + ] + + config.debug_info_flags + + target_cflags +) +if config.target_arch == "s390x": + clang_tysan_static_cflags.append("-mbackchain") +clang_tysan_static_cxxflags = config.cxx_mode_flags + clang_tysan_static_cflags + +clang_tysan_cflags = clang_tysan_static_cflags +clang_tysan_cxxflags = clang_tysan_static_cxxflags + + +def build_invocation(compile_flags): + return " " + " ".join([config.clang] + compile_flags) + " " + + +config.substitutions.append(("%clang ", build_invocation(target_cflags))) +config.substitutions.append(("%clangxx ", build_invocation(target_cxxflags))) +config.substitutions.append(("%clang_tysan ", build_invocation(clang_tysan_cflags))) +config.substitutions.append(("%clangxx_tysan ", build_invocation(clang_tysan_cxxflags))) + + +# FIXME: De-hardcode this path. +tysan_source_dir = os.path.join( + get_required_attr(config, "compiler_rt_src_root"), "lib", "tysan" +) +python_exec = shlex.quote(get_required_attr(config, "python_executable")) + +# Set LD_LIBRARY_PATH to pick dynamic runtime up properly. +push_dynamic_library_lookup_path(config, config.compiler_rt_libdir) + +# Default test suffixes. +config.suffixes = [".c", ".cpp"] + +if config.host_os == "Darwin": + config.suffixes.append(".mm") + +if config.host_os == "Windows": + config.substitutions.append(("%fPIC", "")) + config.substitutions.append(("%fPIE", "")) + config.substitutions.append(("%pie", "")) +else: + config.substitutions.append(("%fPIC", "-fPIC")) + config.substitutions.append(("%fPIE", "-fPIE")) + config.substitutions.append(("%pie", "-pie")) + +# Only run the tests on supported OSs. +if config.host_os not in [ + "Linux", + "Darwin", +]: + config.unsupported = Tr diff --git a/compiler-rt/test/tysan/lit.site.cfg.py.in b/compiler-rt/test/tysan/lit.site.cfg.py.in new file mode 100644 index 0000000000000..b56dce4fed7a2 --- /dev/null +++ b/compiler-rt/test/tysan/lit.site.cfg.py.in @@ -0,0 +1,17 @@ +@LIT_SITE_CFG_IN_HEADER@ + +# Tool-specific config options. +config.name_suffix = "@TYSAN_TEST_CONFIG_SUFFIX@" +config.target_cflags = "@TYSAN_TEST_TARGET_CFLAGS@" +config.clang = "@TYSAN_TEST_TARGET_CC@" +config.bits = "@TYSAN_TEST_BITS@" +config.arm_thumb = "@COMPILER_RT_ARM_THUMB@" +config.apple_platform = "@TYSAN_TEST_APPLE_PLATFORM@" +config.apple_platform_min_deployment_target_flag = "@TYSAN_TEST_MIN_DEPLOYMENT_TARGET_FLAG@" +config.target_arch = "@TYSAN_TEST_TARGET_ARCH@" + +# Load common config for all compiler-rt lit tests. +lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") + +# Load tool-specific config that would do the real work. +lit_config.load_config(config, "@TYSAN_LIT_SOURCE_DIR@/lit.cfg.py") diff --git a/compiler-rt/test/tysan/ptr-float.c b/compiler-rt/test/tysan/ptr-float.c new file mode 100644 index 0000000000000..aaa9895986988 --- /dev/null +++ b/compiler-rt/test/tysan/ptr-float.c @@ -0,0 +1,19 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +float *P; +void zero_array() { + int i; + for (i = 0; i < 1; ++i) + P[i] = 0.0f; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type float accesses an existing object of type p1 float + // CHECK: {{#0 0x.* in zero_array .*ptr-float.c:}}[[@LINE-3]] +} + +int main() { + P = (float *)&P; + zero_array(); +} + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/struct-offset-multiple-compilation-units.cpp b/compiler-rt/test/tysan/struct-offset-multiple-compilation-units.cpp new file mode 100644 index 0000000000000..f7baa14d15aff --- /dev/null +++ b/compiler-rt/test/tysan/struct-offset-multiple-compilation-units.cpp @@ -0,0 +1,51 @@ +// RUN: %clangxx_tysan -O0 %s -c -o %t.o +// RUN: %clangxx_tysan -O0 %s -DPMAIN -c -o %tm.o +// RUN: %clangxx_tysan -O0 %s -DPINIT -c -o %tinit.o +// RUN: %clangxx_tysan -O0 %t.o %tm.o %tinit.o -o %t +// RUN: %run %t 2>&1 | FileCheck %s + +#include +#include + +extern "C" { +typedef struct X { + int *start; + int *end; + int i; +} X; +}; + +#ifdef PMAIN +int foo(struct X *); +void bar(struct X *); +void init(struct X *); + +int main() { + struct X x; + init(&x); + printf("%d\n", foo(&x)); + free(x.start); + return 0; +} + +#elif PINIT + +void init(struct X *x) { + x->start = (int *)calloc(100, sizeof(int)); + x->end = x->start + 99; + x->i = 0; +} + +#else + +__attribute__((noinline)) int foo(struct X *x) { + if (x->start < x->end) + return 30; + return 10; +} + +void bar(struct X *x) { x->end = NULL; } + +#endif + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/struct-offset.c b/compiler-rt/test/tysan/struct-offset.c new file mode 100644 index 0000000000000..7295e0ae121ed --- /dev/null +++ b/compiler-rt/test/tysan/struct-offset.c @@ -0,0 +1,26 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include +#include + +struct X { + int i; + int j; +}; + +int foo(struct X *p, struct X *q) { + q->j = 1; + p->i = 0; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type int (in X at offset 0) accesses an existing object of type int (in X at offset 4) + // CHECK: {{#0 0x.* in foo .*struct-offset.c:}}[[@LINE-3]] + return q->j; +} + +int main() { + unsigned char *p = malloc(3 * sizeof(int)); + printf("%i\n", foo((struct X *)(p + sizeof(int)), (struct X *)p)); +} + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/struct.c b/compiler-rt/test/tysan/struct.c new file mode 100644 index 0000000000000..f7ecef5967624 --- /dev/null +++ b/compiler-rt/test/tysan/struct.c @@ -0,0 +1,39 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +typedef struct S1 { + int i1; +} s1; +typedef struct S2 { + int i2; +} s2; + +void g(int *i) { + *i = 5; + printf("%i\n", *i); +} + +void h(char *c) { + *c = 5; + printf("%i\n", (int)*c); +} + +void f(s1 *s1p, s2 *s2p) { + s1p->i1 = 2; + s2p->i2 = 3; + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation + // CHECK: WRITE of size 4 at {{.*}} with type int (in S2 at offset 0) accesses an existing object of type int (in S1 at offset 0) + // CHECK: {{#0 0x.* in f .*struct.c:}}[[@LINE-3]] + printf("%i\n", s1p->i1); +} + +int main() { + s1 s = {.i1 = 1}; + f(&s, (s2 *)&s); + g(&s.i1); + h((char *)&s.i1); +} + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation diff --git a/compiler-rt/test/tysan/union-wr-wr.c b/compiler-rt/test/tysan/union-wr-wr.c new file mode 100644 index 0000000000000..6414bbfcf9d95 --- /dev/null +++ b/compiler-rt/test/tysan/union-wr-wr.c @@ -0,0 +1,18 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation + +int main() { + union { + int i; + short s; + } u; + + u.i = 42; + u.s = 1; + + printf("%d\n", u.i); +} diff --git a/compiler-rt/test/tysan/violation-pr45282.c b/compiler-rt/test/tysan/violation-pr45282.c new file mode 100644 index 0000000000000..b3d8b0a6465fd --- /dev/null +++ b/compiler-rt/test/tysan/violation-pr45282.c @@ -0,0 +1,32 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// https://github.com/llvm/llvm-project/issues/45282 + +#include + +int main(void) { + + double a[29], b[20]; + int i, j; + + for (i = 0; i < 20; ++i) { + b[i] = 2.01f + 1.f; + ((float *)a)[i] = 2.01f * 2.0145f; + ((float *)a + 38)[i] = 2.01f * 1.0123f; + } + + // CHECK: TypeSanitizer: type-aliasing-violation on address + // CHECK-NEXT: WRITE of size 8 at {{.+}} with type double accesses an existing object of type float + // CHECK-NEXT: in main {{.*/?}}violation-pr45282.c:25 + + // loop of problems + for (j = 2; j <= 4; ++j) { + a[j - 1] = ((float *)a)[j] * ((float *)a + 38)[j - 1]; + ((float *)a + 38)[j - 1] = ((float *)a)[j - 1] + b[j - 1]; + } + + printf("((float *)a + 38)[2] = %f\n", ((float *)a + 38)[2]); + + return 0; +} diff --git a/compiler-rt/test/tysan/violation-pr47137.c b/compiler-rt/test/tysan/violation-pr47137.c new file mode 100644 index 0000000000000..fb895ff729de4 --- /dev/null +++ b/compiler-rt/test/tysan/violation-pr47137.c @@ -0,0 +1,41 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// https://github.com/llvm/llvm-project/issues/47137 +#include +#include +#include + +void f(int m) { + int n = (4 * m + 2) / 3; + uint64_t *a = malloc(n * sizeof(uint64_t)); + uint64_t *b = malloc(n * sizeof(uint64_t)); + uint64_t aa[] = {0xffff3e0000000001, 0x22eaf0b680a88c16, 0x5a65d25ac40e20f3, + 0x34e7ac346236953e, 0x9dea3e0a26c6ba89, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000}; + uint64_t bb[] = {0x0000000024c0ffff, 0x000000004634d940, 0x00000000219d18ef, + 0x0000000000154519, 0x000000000000035f, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000}; + char l[20]; + l[0] = 0; + for (int i = 0; i < n; i++) { + a[i] = aa[i] + l[0] - '0'; + b[i] = bb[i] + l[0] - '0'; + } + + // CHECK: TypeSanitizer: type-aliasing-violation on address + // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type long + // CHECK-NEXT: in f {{.*/?}}violation-pr47137.c:31 + for (int i = 0, j = 0; j < 4 * m; i += 4, j += 3) { + for (int k = 0; k < 3; k++) { + ((uint16_t *)a)[j + k] = ((uint16_t *)a)[i + k]; + ((uint16_t *)b)[j + k] = ((uint16_t *)b)[i + k]; + } + } + + printf("a: %016llx\n", a[0]); + free(a); + free(b); +} + +int main() { f(6); } diff --git a/compiler-rt/test/tysan/violation-pr51837.c b/compiler-rt/test/tysan/violation-pr51837.c new file mode 100644 index 0000000000000..d49a813933d65 --- /dev/null +++ b/compiler-rt/test/tysan/violation-pr51837.c @@ -0,0 +1,34 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include +#include + +// CHECK-NOT: TypeSanitizer + +union a { + int16_t b; + uint64_t c; +} d; + +uint64_t *e = &d.c; +static uint16_t f(int16_t a, int32_t b, uint64_t c); +static int64_t g(int32_t aa, uint8_t h, union a bb) { + int16_t *i = &d.b; + f(0, h, 0); + *i = h; + return 0; +} +uint16_t f(int16_t a, int32_t b, uint64_t c) { + for (d.c = 0; 0;) + ; + *e = 0; + return 0; +} + +int main() { + uint32_t j = 8; + g(1, j, d); + printf("%d\n", d.b); + return 0; +} diff --git a/compiler-rt/test/tysan/violation-pr62544.c b/compiler-rt/test/tysan/violation-pr62544.c new file mode 100644 index 0000000000000..65dd333272116 --- /dev/null +++ b/compiler-rt/test/tysan/violation-pr62544.c @@ -0,0 +1,24 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// https://github.com/llvm/llvm-project/issues/62544 + +int printf(const char *, ...); +int a, b, c; +long d; +int main() { + short *e = &a; + int *f = &a; + *f = 0; + for (; b <= 9; b++) { + int **g = &f; + *f = d; + *g = &c; + } + + // CHECK: TypeSanitizer: type-aliasing-violation on address + // CHECK-NEXT: WRITE of size 2 at {{.+}} with type short accesses an existing object of type int + // CHECK-NEXT: in main {{.*/?}}violation-pr62544.c:22 + *e = 3; + printf("%d\n", a); +} diff --git a/compiler-rt/test/tysan/violation-pr62828.cpp b/compiler-rt/test/tysan/violation-pr62828.cpp new file mode 100644 index 0000000000000..709132c4aba64 --- /dev/null +++ b/compiler-rt/test/tysan/violation-pr62828.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// https://github.com/llvm/llvm-project/issues/62828 +#include + +typedef int int_v8[8]; +typedef short short_v8[8]; +short *test1(int_v8 *cast_c_array, short_v8 *shuf_c_array1, int *ptr) { + int *input1 = reinterpret_cast(((int_v8 *)(cast_c_array))); + short *input2 = reinterpret_cast(reinterpret_cast(input1)); + + short *output1 = reinterpret_cast(((short_v8 *)(shuf_c_array1))); + short *output2 = + reinterpret_cast(reinterpret_cast(output1)); + + for (int r = 0; r < 8; ++r) { + int tmp = (int)((r * 4) + ptr[r]); + if ((ptr[r] / 4) == 0) { + int *input = reinterpret_cast(((int_v8 *)(cast_c_array))); + input[r] = tmp; + } + } + + // CHECK: ERROR: TypeSanitizer: type-aliasing-violation on address + // CHECK-NEXT: READ of size 2 at {{.+}} with type short accesses an existing object of type int + // CHECK-NEXT: in test1(int (*) [8], short (*) [8], int*) {{.*/?}}violation-pr62828.cpp:29 + for (int i3 = 0; i3 < 4; ++i3) { + output2[i3] = input2[(i3 * 2)]; + } + return output2; +} + +int main() { + int_v8 in[4] = {{4, 4, 4, 4}}; + short_v8 out[4] = {{0}}; + int ptr[8] = {2}; + test1(in, out, ptr); + short *p = reinterpret_cast(out); + for (int i = 0; i < 32; i++) { + printf("%d ", p[i]); + } + return 0; +} diff --git a/compiler-rt/test/tysan/violation-pr68655.cpp b/compiler-rt/test/tysan/violation-pr68655.cpp new file mode 100644 index 0000000000000..7be05c7a7d4f8 --- /dev/null +++ b/compiler-rt/test/tysan/violation-pr68655.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// https://github.com/llvm/llvm-project/issues/68655 +struct S1 { + long long a; + long long b; +}; + +// CHECK: TypeSanitizer: type-aliasing-violation on address +// CHECK-NEXT: READ of size 4 at {{.+}} with type int accesses an existing object of type long long (in {{.*}}S1 at offset 0) +// CHECK-NEXT: in copyMem(S1*, S1*) {{.*/?}}violation-pr68655.cpp:19 + +void inline copyMem(S1 *dst, S1 *src) { + unsigned *d = reinterpret_cast(dst); + unsigned *s = reinterpret_cast(src); + + for (int i = 0; i < sizeof(S1) / sizeof(unsigned); i++) { + *d = *s; + d++; + s++; + } +} + +void math(S1 *dst, int *srcA, int idx_t) { + S1 zero[4]; + for (int i = 0; i < 2; i++) { + zero[i].a = i + idx_t; + zero[i].b = i * idx_t; + } + + copyMem(&dst[idx_t], &zero[srcA[idx_t]]); +} + +int main() { + S1 dst = {0}; + int Src[2] = {0, 0}; + math(&dst, &Src[0], 0); + return 0; +} diff --git a/compiler-rt/test/tysan/violation-pr86685.c b/compiler-rt/test/tysan/violation-pr86685.c new file mode 100644 index 0000000000000..43b8d478e6851 --- /dev/null +++ b/compiler-rt/test/tysan/violation-pr86685.c @@ -0,0 +1,29 @@ +// RUN: %clang_tysan -O0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include +#include + +// Violation reported in https://github.com/llvm/llvm-project/issues/86685. +void foo(int *s, float *f, long n) { + for (long i = 0; i < n; ++i) { + *f = 2; + if (i == 1) + break; + + // CHECK: TypeSanitizer: type-aliasing-violation on address + // CHECK-NEXT: WRITE of size 4 at {{.+}} with type int accesses an existing object of type float + // CHECK-NEXT: #0 {{.+}} in foo {{.*/?}}violation-pr86685.c:17 + *s = 4; + } +} + +int main(void) { + union { + int s; + float f; + } u = {0}; + foo(&u.s, &u.f, 2); + printf("%.f\n", u.f); + return 0; +} From 4c2a46f5fe2eaa41f851ff4ca37dcc8794312542 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 17 Dec 2024 13:48:58 -0500 Subject: [PATCH 04/35] [lld/COFF] Make test/COFF/start-lib.ll use split-file The two input files were only used by this one test, so put them inline. No behavior change. --- lld/test/COFF/Inputs/start-lib1.ll | 13 -------- lld/test/COFF/Inputs/start-lib2.ll | 9 ------ lld/test/COFF/start-lib.ll | 50 +++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 29 deletions(-) delete mode 100644 lld/test/COFF/Inputs/start-lib1.ll delete mode 100644 lld/test/COFF/Inputs/start-lib2.ll diff --git a/lld/test/COFF/Inputs/start-lib1.ll b/lld/test/COFF/Inputs/start-lib1.ll deleted file mode 100644 index 3d4fe19daab9c..0000000000000 --- a/lld/test/COFF/Inputs/start-lib1.ll +++ /dev/null @@ -1,13 +0,0 @@ -target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-windows-msvc" - -declare i32 @bar() - -define i32 @foo() { - %1 = call i32 () @bar() - %2 = add i32 %1, 1 - ret i32 %2 -} - -!llvm.linker.options = !{!0} -!0 = !{!"/INCLUDE:foo"} diff --git a/lld/test/COFF/Inputs/start-lib2.ll b/lld/test/COFF/Inputs/start-lib2.ll deleted file mode 100644 index 830ec1d6d191b..0000000000000 --- a/lld/test/COFF/Inputs/start-lib2.ll +++ /dev/null @@ -1,9 +0,0 @@ -target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-windows-msvc" - -define i32 @bar() { - ret i32 1 -} - -!llvm.linker.options = !{!0} -!0 = !{!"/INCLUDE:bar"} diff --git a/lld/test/COFF/start-lib.ll b/lld/test/COFF/start-lib.ll index ac12db067ffa8..f4f49ed4e4f6e 100644 --- a/lld/test/COFF/start-lib.ll +++ b/lld/test/COFF/start-lib.ll @@ -1,11 +1,14 @@ ; REQUIRES: x86 -; -; RUN: llc -filetype=obj %s -o %t.obj -; RUN: llc -filetype=obj %p/Inputs/start-lib1.ll -o %t1.obj -; RUN: llc -filetype=obj %p/Inputs/start-lib2.ll -o %t2.obj -; RUN: opt -thinlto-bc %s -o %t.bc -; RUN: opt -thinlto-bc %p/Inputs/start-lib1.ll -o %t1.bc -; RUN: opt -thinlto-bc %p/Inputs/start-lib2.ll -o %t2.bc + +; RUN: rm -rf %t.dir +; RUN: split-file %s %t.dir + +; RUN: llc -filetype=obj %t.dir/main.ll -o %t.obj +; RUN: llc -filetype=obj %t.dir/start-lib1.ll -o %t1.obj +; RUN: llc -filetype=obj %t.dir/start-lib2.ll -o %t2.obj +; RUN: opt -thinlto-bc %t.dir/main.ll -o %t.bc +; RUN: opt -thinlto-bc %t.dir/start-lib1.ll -o %t1.bc +; RUN: opt -thinlto-bc %t.dir/start-lib2.ll -o %t2.bc ; ; RUN: lld-link -out:%t1.exe -entry:main -opt:noref -lldmap:%t1.map \ ; RUN: %t.obj %t1.obj %t2.obj @@ -37,9 +40,42 @@ ; TEST3-NOT: {{ }}foo{{$}} ; TEST3-NOT: {{ }}bar{{$}} + +#--- main.ll + target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc" define void @main() { ret void } + + +#--- start-lib1.ll + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +declare i32 @bar() + +define i32 @foo() { + %1 = call i32 () @bar() + %2 = add i32 %1, 1 + ret i32 %2 +} + +!llvm.linker.options = !{!0} +!0 = !{!"/INCLUDE:foo"} + + +#--- start-lib2.ll + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define i32 @bar() { + ret i32 1 +} + +!llvm.linker.options = !{!0} +!0 = !{!"/INCLUDE:bar"} From de2acda3dfafc1fb81b1eba1a326272f704e62e6 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Tue, 17 Dec 2024 13:58:01 -0500 Subject: [PATCH 05/35] [AMDGPU][True16][MC] support more VOP3 inst in true16/fake16 format (#113603) Support true16 and fake16 format for more VOP3 instructions in MC This patch updates the true16 and fake16 vop_profile for the following instructions and update the asm/dasm tests: v_mad_u16 v_mad_i16 v_med3_f16 v_med3_i16 v_med3_u16 v_max3_f16 v_max3_i16 v_max3_u16 v_min3_f16 v_min3_i16 v_min3_u16 v_med3_num_f16 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 3 + llvm/lib/Target/AMDGPU/VOP3Instructions.td | 68 +- llvm/lib/Target/AMDGPU/VOPInstructions.td | 16 +- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 882 +++++++---- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 1160 +++++++++----- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 983 +++++++----- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 648 ++++---- llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s | 891 ++++++----- llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 787 ++++++---- .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 1378 +++++++++++++---- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 1362 ++++++++++++---- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 1316 ++++++++++++---- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 921 +++++++++-- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 1185 ++++++++++++-- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 1145 ++++++++++++-- 15 files changed, 9364 insertions(+), 3381 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index bc25d75131cc3..0ecd6346707e2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3659,7 +3659,10 @@ multiclass Int16Med3Pat; let SubtargetPredicate = HasMed3_16 in { +let True16Predicate = NotHasTrue16BitInsts in defm : FPMed3Pat; +let True16Predicate = UseFakeTrue16Insts in +defm : FPMed3Pat; } class diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index e9d00eeb91fe1..4c8b5a6bea3b2 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -371,8 +371,8 @@ let SubtargetPredicate = isGFX9Only, FPDPRounding = 1 in { } // End SubtargetPredicate = isGFX9Only, FPDPRounding = 1 let SubtargetPredicate = isGFX9Plus in { -defm V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile>; -defm V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; +defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>; +defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>; let OtherPredicates = [isNotGFX90APlus] in def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>; } // End SubtargetPredicate = isGFX9Plus @@ -437,16 +437,20 @@ defm: Ternary_i16_Pats; } // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] +multiclass Ternary_i16_Pats_gfx9 { + def : GCNPat < + (op2 (op1 i16:$src0, i16:$src1), i16:$src2), + (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) + >; +} -class Ternary_i16_Pats_gfx9 : GCNPat < - (op2 (op1 i16:$src0, i16:$src1), i16:$src2), - (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) ->; - -let Predicates = [Has16BitInsts, isGFX10Plus] in { -def: Ternary_i16_Pats_gfx9; -} // End Predicates = [Has16BitInsts, isGFX10Plus] +let True16Predicate = UseFakeTrue16Insts in { + defm: Ternary_i16_Pats_gfx9; +} // End True16Predicates = UseFakeTrue16Insts +let OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { + defm: Ternary_i16_Pats_gfx9; +} // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts class ThreeOpFragSDAG : PatFrag< (ops node:$x, node:$y, node:$z), @@ -616,17 +620,17 @@ let isCommutable = 1, isReMaterializable = 1 in { } // End isCommutable = 1, isReMaterializable = 1 // TODO src0 contains the opsel bit for dst, so if we commute, need to mask and swap this // to the new src0. -defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile, AMDGPUfmed3>; -defm V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile, AMDGPUsmed3>; -defm V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile, AMDGPUumed3>; +defm V_MED3_F16 : VOP3Inst_t16 <"v_med3_f16", VOP_F16_F16_F16_F16, AMDGPUfmed3>; +defm V_MED3_I16 : VOP3Inst_t16 <"v_med3_i16", VOP_I16_I16_I16_I16, AMDGPUsmed3>; +defm V_MED3_U16 : VOP3Inst_t16 <"v_med3_u16", VOP_I16_I16_I16_I16, AMDGPUumed3>; -defm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile, AMDGPUfmin3>; -defm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile, AMDGPUsmin3>; -defm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile, AMDGPUumin3>; +defm V_MIN3_F16 : VOP3Inst_t16 <"v_min3_f16", VOP_F16_F16_F16_F16, AMDGPUfmin3>; +defm V_MIN3_I16 : VOP3Inst_t16 <"v_min3_i16", VOP_I16_I16_I16_I16, AMDGPUsmin3>; +defm V_MIN3_U16 : VOP3Inst_t16 <"v_min3_u16", VOP_I16_I16_I16_I16, AMDGPUumin3>; -defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile, AMDGPUfmax3>; -defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile, AMDGPUsmax3>; -defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile, AMDGPUumax3>; +defm V_MAX3_F16 : VOP3Inst_t16 <"v_max3_f16", VOP_F16_F16_F16_F16, AMDGPUfmax3>; +defm V_MAX3_I16 : VOP3Inst_t16 <"v_max3_i16", VOP_I16_I16_I16_I16, AMDGPUsmax3>; +defm V_MAX3_U16 : VOP3Inst_t16 <"v_max3_u16", VOP_I16_I16_I16_I16, AMDGPUumax3>; let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in { defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile, AMDGPUfminimum3>; @@ -1582,7 +1586,7 @@ defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>; defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>; defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>; defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">; -defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">; +defm V_MED3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x232, "v_med3_num_f16", "V_MED3_F16", "v_med3_f16">; defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">; defm V_MAXMIN_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">; defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16", "v_minmax_num_f16">; @@ -1700,22 +1704,22 @@ defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>; defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>; defm V_MQSAD_U32_U8 : VOP3_Real_Base_gfx11_gfx12<0x23d>; defm V_XOR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x240>; -defm V_MAD_U16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x241, "V_MAD_U16_gfx9", "v_mad_u16">; +defm V_MAD_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x241, "v_mad_u16", "V_MAD_U16_gfx9">; defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>; defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>; defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>; defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>; defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">; -defm V_MIN3_F16 : VOP3_Realtriple_gfx11<0x249>; -defm V_MIN3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24a>; -defm V_MIN3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24b>; -defm V_MAX3_F16 : VOP3_Realtriple_gfx11<0x24c>; -defm V_MAX3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24d>; -defm V_MAX3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24e>; -defm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>; -defm V_MED3_I16 : VOP3_Realtriple_gfx11_gfx12<0x250>; -defm V_MED3_U16 : VOP3_Realtriple_gfx11_gfx12<0x251>; -defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x253, "V_MAD_I16_gfx9", "v_mad_i16">; +defm V_MIN3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x249, "v_min3_f16">; +defm V_MIN3_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24a, "v_min3_i16">; +defm V_MIN3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24b, "v_min3_u16">; +defm V_MAX3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24c, "v_max3_f16">; +defm V_MAX3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x24d, "v_max3_i16">; +defm V_MAX3_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x24e, "v_max3_u16">; +defm V_MED3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24f, "v_med3_f16">; +defm V_MED3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x250, "v_med3_i16">; +defm V_MED3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x251, "v_med3_u16">; +defm V_MAD_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x253, "v_mad_i16", "V_MAD_I16_gfx9">; defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>; defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 9366e11233571..0f568ba90a9ef 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1906,10 +1906,22 @@ multiclass VOP3_Realtriple_t16_gfx11 op, string asmName, string opName string pseudo_mnemonic = "", bit isSingle = 0> : VOP3_Realtriple_with_name; +multiclass VOP3_Realtriple_t16_and_fake16_gfx11 op, string asmName, string opName = NAME, + string pseudo_mnemonic = "", bit isSingle = 0> { + defm _t16: VOP3_Realtriple_t16_gfx11; + defm _fake16: VOP3_Realtriple_t16_gfx11; +} + multiclass VOP3Only_Realtriple_t16_gfx11 op, string asmName, string opName = NAME, string pseudo_mnemonic = ""> : VOP3_Realtriple_t16_gfx11; +multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11 op, string asmName, + string opName = NAME, string pseudo_mnemonic = ""> { + defm _t16: VOP3_Realtriple_t16_gfx11; + defm _fake16: VOP3_Realtriple_t16_gfx11; +} + multiclass VOP3be_Real_gfx11 op, string opName, string asmName, bit isSingle = 0> : VOP3be_Real; @@ -1943,8 +1955,8 @@ multiclass VOP3_Realtriple_t16_gfx12 op, string asmName, string opName multiclass VOP3_Realtriple_t16_and_fake16_gfx12 op, string asmName, string opName = NAME, string pseudo_mnemonic = "", bit isSingle = 0> { - defm opName#"_t16":VOP3_Realtriple_t16_gfx12; - defm opName#"_fake16":VOP3_Realtriple_t16_gfx12; + defm _t16:VOP3_Realtriple_t16_gfx12; + defm _fake16:VOP3_Realtriple_t16_gfx12; } multiclass VOP3be_Real_with_name_gfx12 op, string opName, diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index cc75dda110177..c392ff85deef8 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -2744,50 +2744,68 @@ v_lshrrev_b64 v[5:6], src_scc, src_scc v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 // GFX11: v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 ; encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] -v_mad_i16 v5, v1, v2, s3 -// GFX11: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +v_mad_i16 v5.l, v1.l, v2.l, s3 +// GFX11: v_mad_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] -v_mad_i16 v5, v255, s2, s105 -// GFX11: v_mad_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] +v_mad_i16 v5.l, v255.l, s2, s105 +// GFX11: v_mad_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] -v_mad_i16 v5, s1, v255, exec_hi -// GFX11: v_mad_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] +v_mad_i16 v5.l, s1, v255.l, exec_hi +// GFX11: v_mad_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] -v_mad_i16 v5, s105, s105, exec_lo -// GFX11: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +v_mad_i16 v5.l, s105, s105, exec_lo +// GFX11: v_mad_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] -v_mad_i16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +v_mad_i16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_mad_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] -v_mad_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_mad_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +v_mad_i16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_mad_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] -v_mad_i16 v5, m0, 0.5, m0 -// GFX11: v_mad_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01] +v_mad_i16 v5.l, m0, 0.5, m0 +// GFX11: v_mad_i16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01] -v_mad_i16 v5, exec_lo, -1, vcc_hi -// GFX11: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +v_mad_i16 v5.l, exec_lo, -1, vcc_hi +// GFX11: v_mad_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] -v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +v_mad_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: v_mad_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] -v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: v_mad_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_mad_i16 v5.l, null, exec_lo, 0xfe0b +// GFX11: v_mad_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +v_mad_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: v_mad_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] -v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03] +v_mad_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_mad_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03] -v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] +v_mad_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: v_mad_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] -v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp -// GFX11: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_mad_i16 v255.h, 0xfe0b, vcc_hi, null clamp +// GFX11: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5.l, v255.h, s2, s105 +// GFX11: v_mad_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i16 v5.l, s1, v255.h, exec_hi +// GFX11: v_mad_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5.l, exec_hi, null, vcc_lo +// GFX11: v_mad_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i16 v5.l, -1, exec_hi, src_scc +// GFX11: v_mad_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x53,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i16 v5.l, src_scc, vcc_lo, -1 +// GFX11: v_mad_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03] v_mad_i32_i16 v5, v1, v2, v3 // GFX11: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] @@ -2954,50 +2972,68 @@ v_mad_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp // GFX11: v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] -v_mad_u16 v5, v1, v2, s3 -// GFX11: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +v_mad_u16 v5.l, v1.l, v2.l, s3 +// GFX11: v_mad_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u16 v5.l, v255.l, s2, s105 +// GFX11: v_mad_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u16 v5.l, s1, v255.l, exec_hi +// GFX11: v_mad_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u16 v5.l, s105, s105, exec_lo +// GFX11: v_mad_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] -v_mad_u16 v5, v255, s2, s105 -// GFX11: v_mad_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] +v_mad_u16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_mad_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] -v_mad_u16 v5, s1, v255, exec_hi -// GFX11: v_mad_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] +v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_mad_u16 v5, s105, s105, exec_lo -// GFX11: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +v_mad_u16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_mad_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] -v_mad_u16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +v_mad_u16 v5.l, m0, 0.5, m0 +// GFX11: v_mad_u16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01] -v_mad_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_mad_u16 v5.l, exec_lo, -1, vcc_hi +// GFX11: v_mad_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] -v_mad_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +v_mad_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: v_mad_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] -v_mad_u16 v5, m0, 0.5, m0 -// GFX11: v_mad_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01] +v_mad_u16 v5.l, null, exec_lo, 0xfe0b +// GFX11: v_mad_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_mad_u16 v5, exec_lo, -1, vcc_hi -// GFX11: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +v_mad_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: v_mad_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] -v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +v_mad_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_mad_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03] -v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: v_mad_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_mad_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: v_mad_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] -v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +v_mad_u16 v255.h, 0xfe0b, vcc_hi, null clamp +// GFX11: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03] +v_mad_u16 v5.l, v255.h, s2, s105 +// GFX11: v_mad_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] -v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] +v_mad_u16 v5.l, s1, v255.h, exec_hi +// GFX11: v_mad_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] -v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp -// GFX11: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5.l, exec_hi, null, vcc_lo +// GFX11: v_mad_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u16 v5.l, -1, exec_hi, src_scc +// GFX11: v_mad_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x41,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u16 v5.l, src_scc, vcc_lo, -1 +// GFX11: v_mad_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03] v_mad_u32_u16 v5, v1, v2, v3 // GFX11: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] @@ -3164,53 +3200,74 @@ v_mad_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp // GFX11: v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] -v_max3_f16 v5, v1, v2, s3 -// GFX11: v_max3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] +v_max3_f16 v5.l, v1.l, v2.l, s3 +// GFX11: v_max3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_f16 v5.l, v255.l, s2, s105 +// GFX11: v_max3_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_f16 v5.l, s1, v255.l, exec_hi +// GFX11: v_max3_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_f16 v5.l, s105, s105, exec_lo +// GFX11: v_max3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_max3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_max3_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: v_max3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] -v_max3_f16 v5, v255, s2, s105 -// GFX11: v_max3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] +v_max3_f16 v5.l, m0, 0.5, m0 +// GFX11: v_max3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] -v_max3_f16 v5, s1, v255, exec_hi -// GFX11: v_max3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] +v_max3_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX11: v_max3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] -v_max3_f16 v5, s105, s105, exec_lo -// GFX11: v_max3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: v_max3_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] -v_max3_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_max3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX11: v_max3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_max3_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_max3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max3_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: v_max3_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] -v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] +v_max3_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_max3_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] -v_max3_f16 v5, m0, 0.5, m0 -// GFX11: v_max3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] +v_max3_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: v_max3_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] -v_max3_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: v_max3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] +v_max3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp +// GFX11: v_max3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] +v_max3_f16 v5.l, v255.l, s2, s105 mul:2 +// GFX11: v_max3_f16 v5.l, v255.l, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] -v_max3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: v_max3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_max3_f16 v5.l, v255.h, s2, s105 +// GFX11: v_max3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] -v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] +v_max3_f16 v5.l, s1, v255.h, exec_hi +// GFX11: v_max3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] -v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] +v_max3_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_max3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] +v_max3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX11: v_max3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] -v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_max3_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX11: v_max3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] -v_max3_f16 v5, v255, s2, s105 mul:2 -// GFX11: v_max3_f16 v5, v255, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] +v_max3_f16 v5.l, 0.5, -m0, 0.5 +// GFX11: v_max3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] + +v_max3_f16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX11: v_max3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4c,0xd6,0xfd,0xd4,0x04,0x23] v_max3_f32 v5, v1, v2, s3 // GFX11: v_max3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] @@ -3257,50 +3314,68 @@ v_max3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX11: v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_max3_i16 v5, v1, v2, s3 -// GFX11: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +v_max3_i16 v5.l, v1.l, v2.l, s3 +// GFX11: v_max3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i16 v5.l, v255.l, s2, s105 +// GFX11: v_max3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i16 v5.l, s1, v255.l, exec_hi +// GFX11: v_max3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i16 v5.l, s105, s105, exec_lo +// GFX11: v_max3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_max3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] -v_max3_i16 v5, v255, s2, s105 -// GFX11: v_max3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] +v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_max3_i16 v5, s1, v255, exec_hi -// GFX11: v_max3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] +v_max3_i16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_max3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] -v_max3_i16 v5, s105, s105, exec_lo -// GFX11: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_i16 v5.l, m0, 0.5, m0 +// GFX11: v_max3_i16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01] -v_max3_i16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_i16 v5.l, exec_lo, -1, vcc_hi +// GFX11: v_max3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] -v_max3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: v_max3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] -v_max3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +v_max3_i16 v5.l, null, exec_lo, 0xfe0b +// GFX11: v_max3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_max3_i16 v5, m0, 0.5, m0 -// GFX11: v_max3_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01] +v_max3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: v_max3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] -v_max3_i16 v5, exec_lo, -1, vcc_hi -// GFX11: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +v_max3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_max3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03] -v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +v_max3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: v_max3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] -v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: v_max3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_max3_i16 v255.h, 0xfe0b, vcc_hi, null +// GFX11: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +v_max3_i16 v5.l, v255.h, s2, s105 +// GFX11: v_max3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] -v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03] +v_max3_i16 v5.l, s1, v255.h, exec_hi +// GFX11: v_max3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] -v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_max3_i16 v5.l, exec_hi, null, vcc_lo +// GFX11: v_max3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i16 v5.l, -1, exec_hi, src_scc +// GFX11: v_max3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i16 v5.l, src_scc, vcc_lo, -1 +// GFX11: v_max3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfd,0xd4,0x04,0x03] v_max3_i32 v5, v1, v2, s3 // GFX11: v_max3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] @@ -3347,50 +3422,68 @@ v_max3_i32 v5, src_scc, vcc_lo, -1 v_max3_i32 v255, 0xaf123456, vcc_hi, null // GFX11: v_max3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_max3_u16 v5, v1, v2, s3 -// GFX11: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +v_max3_u16 v5.l, v1.l, v2.l, s3 +// GFX11: v_max3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u16 v5.l, v255.l, s2, s105 +// GFX11: v_max3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u16 v5.l, s1, v255.l, exec_hi +// GFX11: v_max3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] -v_max3_u16 v5, v255, s2, s105 -// GFX11: v_max3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] +v_max3_u16 v5.l, s105, s105, exec_lo +// GFX11: v_max3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] -v_max3_u16 v5, s1, v255, exec_hi -// GFX11: v_max3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] +v_max3_u16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_max3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] -v_max3_u16 v5, s105, s105, exec_lo -// GFX11: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_max3_u16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_u16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_max3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] -v_max3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max3_u16 v5.l, m0, 0.5, m0 +// GFX11: v_max3_u16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01] -v_max3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +v_max3_u16 v5.l, exec_lo, -1, vcc_hi +// GFX11: v_max3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] -v_max3_u16 v5, m0, 0.5, m0 -// GFX11: v_max3_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01] +v_max3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: v_max3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] -v_max3_u16 v5, exec_lo, -1, vcc_hi -// GFX11: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +v_max3_u16 v5.l, null, exec_lo, 0xfe0b +// GFX11: v_max3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +v_max3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: v_max3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] -v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: v_max3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_max3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_max3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03] -v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +v_max3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: v_max3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] -v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03] +v_max3_u16 v255.h, 0xfe0b, vcc_hi, null +// GFX11: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +v_max3_u16 v5.l, v255.h, s2, s105 +// GFX11: v_max3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] -v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_max3_u16 v5.l, s1, v255.h, exec_hi +// GFX11: v_max3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5.l, exec_hi, null, vcc_lo +// GFX11: v_max3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u16 v5.l, -1, exec_hi, src_scc +// GFX11: v_max3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u16 v5.l, src_scc, vcc_lo, -1 +// GFX11: v_max3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfd,0xd4,0x04,0x03] v_max3_u32 v5, v1, v2, s3 // GFX11: v_max3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] @@ -3851,53 +3944,77 @@ v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi // GFX11: v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_med3_f16 v5, v1, v2, s3 -// GFX11: v_med3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] +v_med3_f16 v5.l, v1.l, v2.l, s3 +// GFX11: v_med3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_f16 v5.l, v255.l, s2, s105 +// GFX11: v_med3_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_f16 v5.l, s1, v255.l, exec_hi +// GFX11: v_med3_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_f16 v5.l, s105, s105, exec_lo +// GFX11: v_med3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_med3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_f16 v5, v255, s2, s105 -// GFX11: v_med3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] +v_med3_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_med3_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_f16 v5, s1, v255, exec_hi -// GFX11: v_med3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] +v_med3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: v_med3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] -v_med3_f16 v5, s105, s105, exec_lo -// GFX11: v_med3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_f16 v5.l, m0, 0.5, m0 +// GFX11: v_med3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_med3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX11: v_med3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] -v_med3_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_med3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_med3_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: v_med3_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] -v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] +v_med3_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX11: v_med3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_med3_f16 v5, m0, 0.5, m0 -// GFX11: v_med3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: v_med3_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] -v_med3_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: v_med3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] +v_med3_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_med3_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] -v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] +v_med3_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: v_med3_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] -v_med3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: v_med3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_med3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp +// GFX11: v_med3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] +v_med3_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 +// GFX11: v_med3_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] -v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] +v_med3_f16 v5.l, v255.h, s2, s105 +// GFX11: v_med3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] -v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] +v_med3_f16 v5.l, s1, v255.h, exec_hi +// GFX11: v_med3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] -v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_med3_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_med3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 -// GFX11: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] +v_med3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX11: v_med3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX11: v_med3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_f16 v5.l, 0.5, -m0, 0.5 +// GFX11: v_med3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_med3_f16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX11: v_med3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4f,0xd6,0xfd,0xd4,0x04,0x23] + +v_med3_f16 v5.l, 0.5, -m0, 0.5 div:2 +// GFX11: v_med3_f16 v5.l, 0.5, -m0, 0.5 div:2 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] v_med3_f32 v5, v1, v2, s3 // GFX11: v_med3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] @@ -3944,50 +4061,68 @@ v_med3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX11: v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_med3_i16 v5, v1, v2, s3 -// GFX11: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +v_med3_i16 v5.l, v1.l, v2.l, s3 +// GFX11: v_med3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i16 v5.l, v255.l, s2, s105 +// GFX11: v_med3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i16 v5.l, s1, v255.l, exec_hi +// GFX11: v_med3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i16 v5.l, s105, s105, exec_lo +// GFX11: v_med3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_med3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_i16 v5, v255, s2, s105 -// GFX11: v_med3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] +v_med3_i16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_med3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] -v_med3_i16 v5, s1, v255, exec_hi -// GFX11: v_med3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +v_med3_i16 v5.l, m0, 0.5, m0 +// GFX11: v_med3_i16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_i16 v5, s105, s105, exec_lo -// GFX11: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_i16 v5.l, exec_lo, -1, vcc_hi +// GFX11: v_med3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] -v_med3_i16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: v_med3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_med3_i16 v5.l, null, exec_lo, 0xfe0b +// GFX11: v_med3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_med3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +v_med3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: v_med3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] -v_med3_i16 v5, m0, 0.5, m0 -// GFX11: v_med3_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_med3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03] -v_med3_i16 v5, exec_lo, -1, vcc_hi -// GFX11: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +v_med3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: v_med3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] -v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +v_med3_i16 v255.h, 0xfe0b, vcc_hi, null +// GFX11: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: v_med3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_med3_i16 v5.l, v255.h, s2, s105 +// GFX11: v_med3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] -v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +v_med3_i16 v5.l, s1, v255.h, exec_hi +// GFX11: v_med3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] -v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03] +v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +v_med3_i16 v5.l, exec_hi, null, vcc_lo +// GFX11: v_med3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_med3_i16 v5.l, -1, exec_hi, src_scc +// GFX11: v_med3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x50,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i16 v5.l, src_scc, vcc_lo, -1 +// GFX11: v_med3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x50,0xd6,0xfd,0xd4,0x04,0x03] v_med3_i32 v5, v1, v2, s3 // GFX11: v_med3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] @@ -4034,50 +4169,68 @@ v_med3_i32 v5, src_scc, vcc_lo, -1 v_med3_i32 v255, 0xaf123456, vcc_hi, null // GFX11: v_med3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_med3_u16 v5, v1, v2, s3 -// GFX11: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +v_med3_u16 v5.l, v1.l, v2.l, s3 +// GFX11: v_med3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u16 v5.l, v255.l, s2, s105 +// GFX11: v_med3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u16 v5.l, s1, v255.l, exec_hi +// GFX11: v_med3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u16 v5.l, s105, s105, exec_lo +// GFX11: v_med3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_med3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_u16 v5, v255, s2, s105 -// GFX11: v_med3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] +v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_u16 v5, s1, v255, exec_hi -// GFX11: v_med3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +v_med3_u16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_med3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] -v_med3_u16 v5, s105, s105, exec_lo -// GFX11: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_u16 v5.l, m0, 0.5, m0 +// GFX11: v_med3_u16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_u16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_u16 v5.l, exec_lo, -1, vcc_hi +// GFX11: v_med3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] -v_med3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_med3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: v_med3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +v_med3_u16 v5.l, null, exec_lo, 0xfe0b +// GFX11: v_med3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_med3_u16 v5, m0, 0.5, m0 -// GFX11: v_med3_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: v_med3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] -v_med3_u16 v5, exec_lo, -1, vcc_hi -// GFX11: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +v_med3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_med3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03] -v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +v_med3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: v_med3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] -v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: v_med3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_med3_u16 v255.h, 0xfe0b, vcc_hi, null +// GFX11: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +v_med3_u16 v5.l, v255.h, s2, s105 +// GFX11: v_med3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] -v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03] +v_med3_u16 v5.l, s1, v255.h, exec_hi +// GFX11: v_med3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] -v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_med3_u16 v5.l, exec_hi, null, vcc_lo +// GFX11: v_med3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u16 v5.l, -1, exec_hi, src_scc +// GFX11: v_med3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x51,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u16 v5.l, src_scc, vcc_lo, -1 +// GFX11: v_med3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x51,0xd6,0xfd,0xd4,0x04,0x03] v_med3_u32 v5, v1, v2, s3 // GFX11: v_med3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] @@ -4124,53 +4277,74 @@ v_med3_u32 v5, src_scc, vcc_lo, -1 v_med3_u32 v255, 0xaf123456, vcc_hi, null // GFX11: v_med3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_min3_f16 v5, v1, v2, s3 -// GFX11: v_min3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] +v_min3_f16 v5.l, v1.l, v2.l, s3 +// GFX11: v_min3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_f16 v5.l, v255.l, s2, s105 +// GFX11: v_min3_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_f16 v5.l, s1, v255.l, exec_hi +// GFX11: v_min3_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] -v_min3_f16 v5, v255, s2, s105 -// GFX11: v_min3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] +v_min3_f16 v5.l, s105, s105, exec_lo +// GFX11: v_min3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] -v_min3_f16 v5, s1, v255, exec_hi -// GFX11: v_min3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] +v_min3_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_min3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] -v_min3_f16 v5, s105, s105, exec_lo -// GFX11: v_min3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] +v_min3_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_min3_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_min3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] +v_min3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: v_min3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] -v_min3_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_min3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_min3_f16 v5.l, m0, 0.5, m0 +// GFX11: v_min3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] +v_min3_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX11: v_min3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] -v_min3_f16 v5, m0, 0.5, m0 -// GFX11: v_min3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] +v_min3_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: v_min3_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] -v_min3_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: v_min3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] +v_min3_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX11: v_min3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] +v_min3_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: v_min3_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] -v_min3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: v_min3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_min3_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_min3_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] -v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] +v_min3_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: v_min3_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] -v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] +v_min3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp +// GFX11: v_min3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] +v_min3_f16 v5.l, m0, 0.5, m0 clamp mul:4 +// GFX11: v_min3_f16 v5.l, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] -v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_min3_f16 v5.l, v255.h, s2, s105 +// GFX11: v_min3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] -v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 -// GFX11: v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] +v_min3_f16 v5.l, s1, v255.h, exec_hi +// GFX11: v_min3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_min3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX11: v_min3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX11: v_min3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_f16 v5.l, 0.5, -m0, 0.5 +// GFX11: v_min3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x49,0xd6,0xf0,0xfa,0xc0,0x43] + +v_min3_f16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX11: v_min3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x49,0xd6,0xfd,0xd4,0x04,0x23] v_min3_f32 v5, v1, v2, s3 // GFX11: v_min3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] @@ -4217,50 +4391,68 @@ v_min3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX11: v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_min3_i16 v5, v1, v2, s3 -// GFX11: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +v_min3_i16 v5.l, v1.l, v2.l, s3 +// GFX11: v_min3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i16 v5.l, v255.l, s2, s105 +// GFX11: v_min3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i16 v5.l, s1, v255.l, exec_hi +// GFX11: v_min3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i16 v5.l, s105, s105, exec_lo +// GFX11: v_min3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_min3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] -v_min3_i16 v5, v255, s2, s105 -// GFX11: v_min3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] +v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, s1, v255, exec_hi -// GFX11: v_min3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +v_min3_i16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_min3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] -v_min3_i16 v5, s105, s105, exec_lo -// GFX11: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +v_min3_i16 v5.l, m0, 0.5, m0 +// GFX11: v_min3_i16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_i16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +v_min3_i16 v5.l, exec_lo, -1, vcc_hi +// GFX11: v_min3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] -v_min3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_min3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: v_min3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] -v_min3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +v_min3_i16 v5.l, null, exec_lo, 0xfe0b +// GFX11: v_min3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, m0, 0.5, m0 -// GFX11: v_min3_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01] +v_min3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: v_min3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] -v_min3_i16 v5, exec_lo, -1, vcc_hi -// GFX11: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +v_min3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_min3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03] -v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +v_min3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: v_min3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] -v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: v_min3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_min3_i16 v255.h, 0xfe0b, vcc_hi, null +// GFX11: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +v_min3_i16 v5.l, v255.h, s2, s105 +// GFX11: v_min3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] -v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03] +v_min3_i16 v5.l, s1, v255.h, exec_hi +// GFX11: v_min3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] -v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_min3_i16 v5.l, exec_hi, null, vcc_lo +// GFX11: v_min3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i16 v5.l, -1, exec_hi, src_scc +// GFX11: v_min3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i16 v5.l, src_scc, vcc_lo, -1 +// GFX11: v_min3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfd,0xd4,0x04,0x03] v_min3_i32 v5, v1, v2, s3 // GFX11: v_min3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] @@ -4307,50 +4499,68 @@ v_min3_i32 v5, src_scc, vcc_lo, -1 v_min3_i32 v255, 0xaf123456, vcc_hi, null // GFX11: v_min3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_min3_u16 v5, v1, v2, s3 -// GFX11: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +v_min3_u16 v5.l, v1.l, v2.l, s3 +// GFX11: v_min3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u16 v5.l, v255.l, s2, s105 +// GFX11: v_min3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u16 v5.l, s1, v255.l, exec_hi +// GFX11: v_min3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u16 v5.l, s105, s105, exec_lo +// GFX11: v_min3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_min3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5.l, ttmp15, src_scc, ttmp15 +// GFX11: v_min3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] -v_min3_u16 v5, v255, s2, s105 -// GFX11: v_min3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] +v_min3_u16 v5.l, m0, 0.5, m0 +// GFX11: v_min3_u16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_u16 v5, s1, v255, exec_hi -// GFX11: v_min3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +v_min3_u16 v5.l, exec_lo, -1, vcc_hi +// GFX11: v_min3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] -v_min3_u16 v5, s105, s105, exec_lo -// GFX11: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +v_min3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: v_min3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] -v_min3_u16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +v_min3_u16 v5.l, null, exec_lo, 0xfe0b +// GFX11: v_min3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_min3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_min3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: v_min3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] -v_min3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +v_min3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_min3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03] -v_min3_u16 v5, m0, 0.5, m0 -// GFX11: v_min3_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01] +v_min3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: v_min3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] -v_min3_u16 v5, exec_lo, -1, vcc_hi -// GFX11: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +v_min3_u16 v255.h, 0xfe0b, vcc_hi, null +// GFX11: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +v_min3_u16 v5.l, v255.h, s2, s105 +// GFX11: v_min3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] -v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: v_min3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_min3_u16 v5.l, s1, v255.h, exec_hi +// GFX11: v_min3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] -v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03] +v_min3_u16 v5.l, exec_hi, null, vcc_lo +// GFX11: v_min3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] -v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +v_min3_u16 v5.l, -1, exec_hi, src_scc +// GFX11: v_min3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] -v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_min3_u16 v5.l, src_scc, vcc_lo, -1 +// GFX11: v_min3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfd,0xd4,0x04,0x03] v_min3_u32 v5, v1, v2, s3 // GFX11: v_min3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 19d1a92b4a2e7..38369c8dcc4d4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -1820,47 +1820,74 @@ v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_mad_i16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_mad_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX11: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -1946,47 +1973,74 @@ v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX11: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2072,47 +2126,83 @@ v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_max3_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_max3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_max3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_max3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_max3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_max3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_max3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: v_max3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] -v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] -v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_max3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] + +v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2156,47 +2246,74 @@ v_max3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mas v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x1c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX11: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2240,47 +2357,74 @@ v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_max3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_max3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX11: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2684,47 +2828,83 @@ v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_med3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_med3_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_med3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_med3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_med3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_med3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_med3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_med3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: v_med3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_med3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_med3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +v_med3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] + +v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2768,47 +2948,74 @@ v_med3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mas v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x1f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX11: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2852,47 +3059,74 @@ v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX11: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2936,47 +3170,83 @@ v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_min3_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_min3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_min3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_min3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x49,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_min3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_min3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: v_min3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x49,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_min3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x49,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +v_min3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] + +v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3020,47 +3290,74 @@ v_min3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mas v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x19,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX11: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3104,47 +3401,74 @@ v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_min3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX11: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4469,20 +4793,20 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 // GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] @@ -4490,20 +4814,20 @@ v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX11: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 // GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] @@ -4511,140 +4835,140 @@ v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX11: v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_max3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_max3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_med3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_med3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 // GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index ed04417601c06..b2fc2c5908498 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -1003,38 +1003,59 @@ v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc0,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1108,38 +1129,59 @@ v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x0a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc0,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1213,41 +1255,74 @@ v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x0b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x4c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x4c,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_max3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x4c,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_max3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x4c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4c,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x4c,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x4c,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x4c,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x4c,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_max3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1285,38 +1360,59 @@ v_max3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x1c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1354,38 +1450,59 @@ v_max3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1627,41 +1744,74 @@ v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1f,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_med3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x4f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x4f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_med3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x4f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x4f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x4f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x4f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4f,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x4f,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x4f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_med3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_med3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1699,38 +1849,59 @@ v_med3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x1f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1768,38 +1939,59 @@ v_med3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x20,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1837,41 +2029,74 @@ v_med3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_med3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_med3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x21,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x49,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x49,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x49,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_min3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x49,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x49,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x49,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x49,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x49,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x49,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x49,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_min3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1909,38 +2134,59 @@ v_min3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x19,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1978,38 +2224,59 @@ v_min3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2826,20 +3093,20 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x5a,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] @@ -2847,20 +3114,20 @@ v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX11: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x59,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] @@ -2868,140 +3135,140 @@ v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX11: v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x4c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x4c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x4f,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x4f,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x49,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x49,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 060d0f33cc280..482b58d96c8f1 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -2684,50 +2684,62 @@ v_lshrrev_b64 v[5:6], src_scc, src_scc v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 // GFX12: v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 ; encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] -v_mad_i16 v5, v1, v2, s3 -// GFX12: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +v_mad_i16 v5.l, v1.l, v2.l, s3 +// GFX12: v_mad_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] -v_mad_i16 v5, v255, s2, s105 -// GFX12: v_mad_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] +v_mad_i16 v5.l, v255.l, s2, s105 +// GFX12: v_mad_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] -v_mad_i16 v5, s1, v255, exec_hi -// GFX12: v_mad_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] +v_mad_i16 v5.l, s1, v255.l, exec_hi +// GFX12: v_mad_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] -v_mad_i16 v5, s105, s105, exec_lo -// GFX12: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +v_mad_i16 v5.l, s105, s105, exec_lo +// GFX12: v_mad_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] -v_mad_i16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +v_mad_i16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_mad_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] -v_mad_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_mad_i16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +v_mad_i16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_mad_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] -v_mad_i16 v5, m0, 0.5, m0 -// GFX12: v_mad_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01] +v_mad_i16 v5.l, m0, 0.5, m0 +// GFX12: v_mad_i16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01] -v_mad_i16 v5, exec_lo, -1, vcc_hi -// GFX12: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +v_mad_i16 v5.l, exec_lo, -1, vcc_hi +// GFX12: v_mad_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] -v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX12: v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +v_mad_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: v_mad_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] -v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX12: v_mad_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_mad_i16 v5.l, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: v_mad_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX12: v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +v_mad_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: v_mad_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] -v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03] +v_mad_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_mad_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03] -v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX12: v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] +v_mad_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: v_mad_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] -v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp -// GFX12: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX12: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5.l, v255.h, s2, s105 +// GFX12: v_mad_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i16 v5.l, s1, v255.h, exec_hi +// GFX12: v_mad_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v255.h, 0xfe0b, vcc_hi, null clamp +// GFX12: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] v_mad_i32_i16 v5, v1, v2, v3 // GFX12: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] @@ -2894,50 +2906,62 @@ v_mad_co_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc v_mad_co_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp // GFX12: v_mad_co_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] -v_mad_u16 v5, v1, v2, s3 -// GFX12: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +v_mad_u16 v5.l, v1.l, v2.l, s3 +// GFX12: v_mad_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u16 v5.l, v255.l, s2, s105 +// GFX12: v_mad_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] -v_mad_u16 v5, v255, s2, s105 -// GFX12: v_mad_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] +v_mad_u16 v5.l, s1, v255.l, exec_hi +// GFX12: v_mad_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] -v_mad_u16 v5, s1, v255, exec_hi -// GFX12: v_mad_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] +v_mad_u16 v5.l, s105, s105, exec_lo +// GFX12: v_mad_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] -v_mad_u16 v5, s105, s105, exec_lo -// GFX12: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +v_mad_u16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_mad_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] -v_mad_u16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_mad_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_mad_u16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_mad_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] -v_mad_u16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +v_mad_u16 v5.l, m0, 0.5, m0 +// GFX12: v_mad_u16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01] -v_mad_u16 v5, m0, 0.5, m0 -// GFX12: v_mad_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01] +v_mad_u16 v5.l, exec_lo, -1, vcc_hi +// GFX12: v_mad_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] -v_mad_u16 v5, exec_lo, -1, vcc_hi -// GFX12: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +v_mad_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: v_mad_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] -v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX12: v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +v_mad_u16 v5.l, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: v_mad_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX12: v_mad_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_mad_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: v_mad_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] -v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX12: v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +v_mad_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_mad_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03] -v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03] +v_mad_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: v_mad_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] -v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX12: v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] +v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX12: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp -// GFX12: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_mad_u16 v5.l, v255.h, s2, s105 +// GFX12: v_mad_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u16 v5.l, s1, v255.h, exec_hi +// GFX12: v_mad_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v255.h, 0xfe0b, vcc_hi, null clamp +// GFX12: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] v_mad_u32_u16 v5, v1, v2, v3 // GFX12: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] @@ -3194,50 +3218,62 @@ v_max3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_max3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX12: v_max3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x2a,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_max3_i16 v5, v1, v2, s3 -// GFX12: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +v_max3_i16 v5.l, v1.l, v2.l, s3 +// GFX12: v_max3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i16 v5.l, v255.l, s2, s105 +// GFX12: v_max3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] -v_max3_i16 v5, v255, s2, s105 -// GFX12: v_max3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] +v_max3_i16 v5.l, s1, v255.l, exec_hi +// GFX12: v_max3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] -v_max3_i16 v5, s1, v255, exec_hi -// GFX12: v_max3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] +v_max3_i16 v5.l, s105, s105, exec_lo +// GFX12: v_max3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] -v_max3_i16 v5, s105, s105, exec_lo -// GFX12: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_i16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_max3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] -v_max3_i16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_max3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max3_i16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_max3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] -v_max3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +v_max3_i16 v5.l, m0, 0.5, m0 +// GFX12: v_max3_i16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01] -v_max3_i16 v5, m0, 0.5, m0 -// GFX12: v_max3_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01] +v_max3_i16 v5.l, exec_lo, -1, vcc_hi +// GFX12: v_max3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] -v_max3_i16 v5, exec_lo, -1, vcc_hi -// GFX12: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +v_max3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: v_max3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] -v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX12: v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +v_max3_i16 v5.l, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: v_max3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX12: v_max3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_max3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: v_max3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] -v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX12: v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +v_max3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_max3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03] -v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03] +v_max3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: v_max3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] -v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX12: v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX12: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_max3_i16 v5.l, v255.h, s2, s105 +// GFX12: v_max3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i16 v5.l, s1, v255.h, exec_hi +// GFX12: v_max3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v255.h, 0xfe0b, vcc_hi, null +// GFX12: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] v_max3_i32 v5, v1, v2, s3 // GFX12: v_max3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] @@ -3284,50 +3320,62 @@ v_max3_i32 v5, src_scc, vcc_lo, -1 v_max3_i32 v255, 0xaf123456, vcc_hi, null // GFX12: v_max3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_max3_u16 v5, v1, v2, s3 -// GFX12: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +v_max3_u16 v5.l, v1.l, v2.l, s3 +// GFX12: v_max3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u16 v5.l, v255.l, s2, s105 +// GFX12: v_max3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u16 v5.l, s1, v255.l, exec_hi +// GFX12: v_max3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] -v_max3_u16 v5, v255, s2, s105 -// GFX12: v_max3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] +v_max3_u16 v5.l, s105, s105, exec_lo +// GFX12: v_max3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] -v_max3_u16 v5, s1, v255, exec_hi -// GFX12: v_max3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] +v_max3_u16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_max3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] -v_max3_u16 v5, s105, s105, exec_lo -// GFX12: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_max3_u16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_u16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_max3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] -v_max3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max3_u16 v5.l, m0, 0.5, m0 +// GFX12: v_max3_u16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01] -v_max3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +v_max3_u16 v5.l, exec_lo, -1, vcc_hi +// GFX12: v_max3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] -v_max3_u16 v5, m0, 0.5, m0 -// GFX12: v_max3_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01] +v_max3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: v_max3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] -v_max3_u16 v5, exec_lo, -1, vcc_hi -// GFX12: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +v_max3_u16 v5.l, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: v_max3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX12: v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +v_max3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: v_max3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] -v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX12: v_max3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_max3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_max3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03] -v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX12: v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +v_max3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: v_max3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] -v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03] +v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX12: v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +v_max3_u16 v5.l, v255.h, s2, s105 +// GFX12: v_max3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] -v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX12: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_max3_u16 v5.l, s1, v255.h, exec_hi +// GFX12: v_max3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v255.h, 0xfe0b, vcc_hi, null +// GFX12: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] v_max3_u32 v5, v1, v2, s3 // GFX12: v_max3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] @@ -3752,50 +3800,62 @@ v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi // GFX12: v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_med3_num_f16 v5, v1, v2, s3 -// GFX12: v_med3_num_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] +v_med3_num_f16 v5.l, v1.l, v2.l, s3 +// GFX12: v_med3_num_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_num_f16 v5.l, v255.l, s2, s105 +// GFX12: v_med3_num_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_num_f16 v5.l, s1, v255.l, exec_hi +// GFX12: v_med3_num_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] -v_med3_num_f16 v5, v255, s2, s105 -// GFX12: v_med3_num_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] +v_med3_num_f16 v5.l, s105, s105, exec_lo +// GFX12: v_med3_num_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] -v_med3_num_f16 v5, s1, v255, exec_hi -// GFX12: v_med3_num_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] +v_med3_num_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_med3_num_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_num_f16 v5, s105, s105, exec_lo -// GFX12: v_med3_num_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_num_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_med3_num_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_num_f16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_med3_num_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_num_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: v_med3_num_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] -v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_med3_num_f16 v5.l, m0, 0.5, m0 +// GFX12: v_med3_num_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX12: v_med3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] +v_med3_num_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX12: v_med3_num_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] -v_med3_num_f16 v5, m0, 0.5, m0 -// GFX12: v_med3_num_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_num_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: v_med3_num_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] -v_med3_num_f16 v5, |exec_lo|, -1, vcc_hi -// GFX12: v_med3_num_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] +v_med3_num_f16 v5.l, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: v_med3_num_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_med3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX12: v_med3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] +v_med3_num_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: v_med3_num_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] -v_med3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX12: v_med3_num_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_med3_num_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_med3_num_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] -v_med3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX12: v_med3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] +v_med3_num_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: v_med3_num_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] -v_med3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_med3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] +v_med3_num_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: v_med3_num_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_med3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX12: v_med3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] +v_med3_num_f16 v5.l, v255.h, s2, s105 +// GFX12: v_med3_num_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x32,0xd6,0xff,0x05,0xa4,0x01] -v_med3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX12: v_med3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_med3_num_f16 v5.l, s1, v255.h, exec_hi +// GFX12: v_med3_num_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x32,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_num_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_med3_num_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_num_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp +// GFX12: v_med3_num_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] v_med3_num_f32 v5, v1, v2, s3 // GFX12: v_med3_num_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x31,0xd6,0x01,0x05,0x0e,0x00] @@ -3842,50 +3902,62 @@ v_med3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_med3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX12: v_med3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x31,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_med3_i16 v5, v1, v2, s3 -// GFX12: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +v_med3_i16 v5.l, v1.l, v2.l, s3 +// GFX12: v_med3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i16 v5.l, v255.l, s2, s105 +// GFX12: v_med3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i16 v5.l, s1, v255.l, exec_hi +// GFX12: v_med3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i16 v5.l, s105, s105, exec_lo +// GFX12: v_med3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] -v_med3_i16 v5, v255, s2, s105 -// GFX12: v_med3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] +v_med3_i16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_med3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_i16 v5, s1, v255, exec_hi -// GFX12: v_med3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_i16 v5, s105, s105, exec_lo -// GFX12: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_i16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_med3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] -v_med3_i16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_i16 v5.l, m0, 0.5, m0 +// GFX12: v_med3_i16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_med3_i16 v5.l, exec_lo, -1, vcc_hi +// GFX12: v_med3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] -v_med3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +v_med3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: v_med3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_i16 v5, m0, 0.5, m0 -// GFX12: v_med3_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_i16 v5.l, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: v_med3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_med3_i16 v5, exec_lo, -1, vcc_hi -// GFX12: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +v_med3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: v_med3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] -v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX12: v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +v_med3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_med3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03] -v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX12: v_med3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_med3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: v_med3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] -v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX12: v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03] +v_med3_i16 v5.l, v255.h, s2, s105 +// GFX12: v_med3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] -v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX12: v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +v_med3_i16 v5.l, s1, v255.h, exec_hi +// GFX12: v_med3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] -v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX12: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v255.h, 0xfe0b, vcc_hi, null +// GFX12: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] v_med3_i32 v5, v1, v2, s3 // GFX12: v_med3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] @@ -3932,50 +4004,62 @@ v_med3_i32 v5, src_scc, vcc_lo, -1 v_med3_i32 v255, 0xaf123456, vcc_hi, null // GFX12: v_med3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_med3_u16 v5, v1, v2, s3 -// GFX12: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +v_med3_u16 v5.l, v1.l, v2.l, s3 +// GFX12: v_med3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u16 v5.l, v255.l, s2, s105 +// GFX12: v_med3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u16 v5.l, s1, v255.l, exec_hi +// GFX12: v_med3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u16 v5.l, s105, s105, exec_lo +// GFX12: v_med3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] -v_med3_u16 v5, v255, s2, s105 -// GFX12: v_med3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] +v_med3_u16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_med3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_u16 v5, s1, v255, exec_hi -// GFX12: v_med3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_u16 v5, s105, s105, exec_lo -// GFX12: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_u16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_med3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] -v_med3_u16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_u16 v5.l, m0, 0.5, m0 +// GFX12: v_med3_u16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_med3_u16 v5.l, exec_lo, -1, vcc_hi +// GFX12: v_med3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] -v_med3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +v_med3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: v_med3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_u16 v5, m0, 0.5, m0 -// GFX12: v_med3_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_u16 v5.l, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: v_med3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_med3_u16 v5, exec_lo, -1, vcc_hi -// GFX12: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +v_med3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: v_med3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] -v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX12: v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +v_med3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_med3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03] -v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX12: v_med3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_med3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: v_med3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] -v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX12: v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03] +v_med3_u16 v5.l, v255.h, s2, s105 +// GFX12: v_med3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] -v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX12: v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +v_med3_u16 v5.l, s1, v255.h, exec_hi +// GFX12: v_med3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] -v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX12: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v255.h, 0xfe0b, vcc_hi, null +// GFX12: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] v_med3_u32 v5, v1, v2, s3 // GFX12: v_med3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] @@ -4112,50 +4196,62 @@ v_min3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_min3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX12: v_min3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x29,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_min3_i16 v5, v1, v2, s3 -// GFX12: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +v_min3_i16 v5.l, v1.l, v2.l, s3 +// GFX12: v_min3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i16 v5.l, v255.l, s2, s105 +// GFX12: v_min3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i16 v5.l, s1, v255.l, exec_hi +// GFX12: v_min3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i16 v5.l, s105, s105, exec_lo +// GFX12: v_min3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_min3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] -v_min3_i16 v5, v255, s2, s105 -// GFX12: v_min3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] +v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, s1, v255, exec_hi -// GFX12: v_min3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +v_min3_i16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_min3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] -v_min3_i16 v5, s105, s105, exec_lo -// GFX12: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +v_min3_i16 v5.l, m0, 0.5, m0 +// GFX12: v_min3_i16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_i16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +v_min3_i16 v5.l, exec_lo, -1, vcc_hi +// GFX12: v_min3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] -v_min3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_min3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: v_min3_i16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] -v_min3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +v_min3_i16 v5.l, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: v_min3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, m0, 0.5, m0 -// GFX12: v_min3_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01] +v_min3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: v_min3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] -v_min3_i16 v5, exec_lo, -1, vcc_hi -// GFX12: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +v_min3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_min3_i16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03] -v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX12: v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +v_min3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: v_min3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] -v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX12: v_min3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX12: v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +v_min3_i16 v5.l, v255.h, s2, s105 +// GFX12: v_min3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] -v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03] +v_min3_i16 v5.l, s1, v255.h, exec_hi +// GFX12: v_min3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] -v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX12: v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX12: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_min3_i16 v255.h, 0xfe0b, vcc_hi, null +// GFX12: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] v_min3_i32 v5, v1, v2, s3 // GFX12: v_min3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] @@ -4202,50 +4298,62 @@ v_min3_i32 v5, src_scc, vcc_lo, -1 v_min3_i32 v255, 0xaf123456, vcc_hi, null // GFX12: v_min3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_min3_u16 v5, v1, v2, s3 -// GFX12: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +v_min3_u16 v5.l, v1.l, v2.l, s3 +// GFX12: v_min3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u16 v5.l, v255.l, s2, s105 +// GFX12: v_min3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u16 v5.l, s1, v255.l, exec_hi +// GFX12: v_min3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u16 v5.l, s105, s105, exec_lo +// GFX12: v_min3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_min3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] -v_min3_u16 v5, v255, s2, s105 -// GFX12: v_min3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] +v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_u16 v5, s1, v255, exec_hi -// GFX12: v_min3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +v_min3_u16 v5.l, ttmp15, src_scc, ttmp15 +// GFX12: v_min3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] -v_min3_u16 v5, s105, s105, exec_lo -// GFX12: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +v_min3_u16 v5.l, m0, 0.5, m0 +// GFX12: v_min3_u16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_u16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +v_min3_u16 v5.l, exec_lo, -1, vcc_hi +// GFX12: v_min3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] -v_min3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_min3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: v_min3_u16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] -v_min3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX12: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +v_min3_u16 v5.l, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: v_min3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_min3_u16 v5, m0, 0.5, m0 -// GFX12: v_min3_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01] +v_min3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: v_min3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] -v_min3_u16 v5, exec_lo, -1, vcc_hi -// GFX12: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +v_min3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_min3_u16 v5.l, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03] -v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX12: v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +v_min3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: v_min3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] -v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX12: v_min3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX12: v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +v_min3_u16 v5.l, v255.h, s2, s105 +// GFX12: v_min3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] -v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03] +v_min3_u16 v5.l, s1, v255.h, exec_hi +// GFX12: v_min3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] -v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX12: v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX12: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_min3_u16 v255.h, 0xfe0b, vcc_hi, null +// GFX12: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] v_min3_u32 v5, v1, v2, s3 // GFX12: v_min3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 898341956098b..c66c102e4a011 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -2135,53 +2135,68 @@ v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_mad_i16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_mad_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX12: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2279,53 +2294,68 @@ v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_mad_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_mad_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX12: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2519,53 +2549,68 @@ v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_max3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_max3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX12: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2615,53 +2660,68 @@ v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_max3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX12: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3095,53 +3155,68 @@ v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_med3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_med3_num_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_med3_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 +// GFX12: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 +// GFX12: v_med3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_med3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_med3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_med3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_med3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] + +v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3191,53 +3266,68 @@ v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX12: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_med3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3287,53 +3377,68 @@ v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX12: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_med3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3479,53 +3584,68 @@ v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX12: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3575,53 +3695,68 @@ v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] +// GFX12: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] -v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5092,20 +5227,20 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 // GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] @@ -5113,20 +5248,20 @@ v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX12: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 // GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] @@ -5149,80 +5284,80 @@ v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_ma v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX12: v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf // GFX12: v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] @@ -5239,35 +5374,35 @@ v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_ma v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX12: v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 // GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 107c9150532ad..915bc9f9d8f93 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -1242,44 +1242,59 @@ v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc0,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1365,44 +1380,59 @@ v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x0a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x80,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc0,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1572,44 +1602,59 @@ v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] // GFX12: v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x2a,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1653,44 +1698,59 @@ v_max3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1962,47 +2022,62 @@ v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1f,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x32,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX12: v_med3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x32,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x32,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] -// GFX12: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x32,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x32,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x32,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x32,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_med3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2046,44 +2121,59 @@ v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] // GFX12: v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x31,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2127,44 +2217,59 @@ v_med3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x20,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2292,44 +2397,59 @@ v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] // GFX12: v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x29,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2373,44 +2493,59 @@ v_min3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3361,20 +3496,20 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x5a,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] @@ -3382,20 +3517,20 @@ v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX12: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x59,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] @@ -3418,80 +3553,80 @@ v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2, v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX12: v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x2b,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] @@ -3508,35 +3643,35 @@ v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2, v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX12: v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 34e0af0cc1f49..39115203e47f5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -2895,49 +2895,118 @@ # GFX11: v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 ; encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mad_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mad_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_mad_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_mad_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_mad_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_mad_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_mad_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_mad_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_mad_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_mad_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_mad_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_mad_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_mad_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_mad_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_mad_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_mad_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_mad_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_mad_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mad_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_mad_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_mad_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_mad_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_mad_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] - -0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] - -0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_mad_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_mad_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_mad_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_mad_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +0x05,0x00,0x53,0xd6,0xc1,0xfe,0xf4,0x03 +# W32-REAL16: v_mad_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_mad_i16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_mad_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_mad_i16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x53,0xd6,0xc1,0xfe,0xf4,0x03] + +0x05,0x00,0x53,0xd6,0xf0,0xfa,0xc0,0x03 +# W32-REAL16: v_mad_i16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] + +0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03 +# W32-REAL16: v_mad_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_mad_i16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_mad_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_mad_i16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x53,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] +0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_mad_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_mad_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_mad_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_mad_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_mad_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_mad_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_mad_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_mad_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04 # GFX11: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] @@ -3089,49 +3158,118 @@ # GFX11: v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] 0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mad_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mad_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_mad_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_mad_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_mad_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_mad_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_mad_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_mad_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_mad_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_mad_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_mad_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_mad_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_mad_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_mad_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_mad_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_mad_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_mad_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_mad_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mad_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_mad_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_mad_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_mad_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_mad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] - -0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_mad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_mad_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_mad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +0x05,0x00,0x41,0xd6,0xc1,0xfe,0xf4,0x03 +# W32-REAL16: v_mad_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_mad_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_mad_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_mad_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x41,0xd6,0xc1,0xfe,0xf4,0x03] + +0x05,0x00,0x41,0xd6,0xf0,0xfa,0xc0,0x03 +# W32-REAL16: v_mad_u16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] + +0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03 +# W32-REAL16: v_mad_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_mad_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_mad_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_mad_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x41,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] - -0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] +0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_mad_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_mad_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_mad_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_mad_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_mad_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_mad_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_mad_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_mad_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04 # GFX11: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] @@ -3283,53 +3421,125 @@ # GFX11: v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_max3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_max3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_max3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_max3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_max3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_max3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_max3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_max3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_max3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_max3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_max3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_max3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_max3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_max3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_max3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_max3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_max3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_max3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_max3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_max3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_max3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_max3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_max3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_max3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_max3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_max3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_max3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_max3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_max3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_max3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_max3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] - -0x05,0x7c,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] - -0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] - -0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_max3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_max3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] + +0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +0x05,0x06,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3 +# W32-REAL16: v_max3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_max3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] + +0x05,0x00,0x4c,0xd6,0xf0,0xfa,0xc0,0x43 +# W32-REAL16: v_max3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_max3_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_max3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_max3_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] + +0x05,0x02,0x4c,0xd6,0xfd,0xd4,0x04,0x23 +# W32-REAL16: v_max3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4c,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_max3_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4c,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_max3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4c,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_max3_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4c,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] # CHECK: v_max3_f16 v5, v255, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] 0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09 -# GFX11: v_max3_f16 v5, v255, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] +# W32-REAL16: v_max3_f16 v5.l, v255.l, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] +# W32-FAKE16: v_max3_f16 v5, v255, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] +# W64-REAL16: v_max3_f16 v5.l, v255.l, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] +# W64-FAKE16: v_max3_f16 v5, v255, s2, s105 mul:2 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] + +0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_max3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4c,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_max3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_max3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] @@ -3377,49 +3587,118 @@ # GFX11: v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_max3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_max3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_max3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_max3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_max3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_max3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_max3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_max3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_max3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_max3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_max3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_max3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_max3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_max3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_max3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_max3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_max3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] - -0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_max3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_max3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_max3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +0x05,0x00,0x4d,0xd6,0xc1,0xfe,0xf4,0x03 +# W32-REAL16: v_max3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_max3_i16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_max3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_max3_i16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] + +0x05,0x00,0x4d,0xd6,0xf0,0xfa,0xc0,0x03 +# W32-REAL16: v_max3_i16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] + +0x05,0x00,0x4d,0xd6,0xfd,0xd4,0x04,0x03 +# W32-REAL16: v_max3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_max3_i16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_max3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_max3_i16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] - -0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_max3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_max3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_max3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_max3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] @@ -3467,49 +3746,118 @@ # GFX11: v_max3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_max3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_max3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_max3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_max3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_max3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_max3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_max3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_max3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_max3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_max3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_max3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_max3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_max3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_max3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_max3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_max3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_max3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] - -0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_max3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_max3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_max3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_max3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +0x05,0x00,0x4e,0xd6,0xc1,0xfe,0xf4,0x03 +# W32-REAL16: v_max3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_max3_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_max3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_max3_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] + +0x05,0x00,0x4e,0xd6,0xf0,0xfa,0xc0,0x03 +# W32-REAL16: v_max3_u16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] + +0x05,0x00,0x4e,0xd6,0xfd,0xd4,0x04,0x03 +# W32-REAL16: v_max3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_max3_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_max3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_max3_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_max3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_max3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_max3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_max3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] @@ -4079,53 +4427,125 @@ # GFX11: v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_med3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_med3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_med3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_med3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_med3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_med3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_med3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_med3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_med3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_med3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_med3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_med3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_med3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_med3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_med3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_med3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_med3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_med3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_med3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_med3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_med3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_med3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_med3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_med3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_med3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_med3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_med3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_med3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_med3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_med3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_med3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] - -0x05,0x7c,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] - -0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] - -0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_med3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_med3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] + +0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +0x05,0x06,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3 +# W32-REAL16: v_med3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_med3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] + +0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x43 +# W32-REAL16: v_med3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_med3_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_med3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_med3_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] + +0x05,0x02,0x4f,0xd6,0xfd,0xd4,0x04,0x23 +# W32-REAL16: v_med3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4f,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_med3_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4f,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_med3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4f,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_med3_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x4f,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +# CHECK: v_med3_f16 v5.l, 0.5, -m0, 0.5 div:2 ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] +0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b +# W32-REAL16: v_med3_f16 v5.l, 0.5, -m0, 0.5 div:2 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] +# W32-FAKE16: v_med3_f16 v5, 0.5, -m0, 0.5 div:2 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] +# W64-REAL16: v_med3_f16 v5.l, 0.5, -m0, 0.5 div:2 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] +# W64-FAKE16: v_med3_f16 v5, 0.5, -m0, 0.5 div:2 ; encoding: [0x05,0x00,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] + +0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_med3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4f,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_med3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -# CHECK: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] -0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b -# GFX11: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] +0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_med3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] @@ -4173,49 +4593,118 @@ # GFX11: v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_med3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_med3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_med3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_med3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_med3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_med3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_med3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_med3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_med3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_med3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_med3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_med3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_med3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_med3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_med3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_med3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_med3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_med3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_med3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_med3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_med3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +0x05,0x00,0x50,0xd6,0xc1,0xfe,0xf4,0x03 +# W32-REAL16: v_med3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_med3_i16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_med3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_med3_i16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x50,0xd6,0xc1,0xfe,0xf4,0x03] + +0x05,0x00,0x50,0xd6,0xf0,0xfa,0xc0,0x03 +# W32-REAL16: v_med3_i16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] + +0x05,0x00,0x50,0xd6,0xfd,0xd4,0x04,0x03 +# W32-REAL16: v_med3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x50,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_med3_i16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x50,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_med3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x50,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_med3_i16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x50,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] - -0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_med3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_med3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_med3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_med3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] @@ -4263,49 +4752,118 @@ # GFX11: v_med3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_med3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_med3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_med3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_med3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_med3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_med3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_med3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_med3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_med3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_med3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_med3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_med3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_med3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_med3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_med3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_med3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_med3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] - -0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] - -0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_med3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_med3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_med3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_med3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_med3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +0x05,0x00,0x51,0xd6,0xc1,0xfe,0xf4,0x03 +# W32-REAL16: v_med3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_med3_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_med3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_med3_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x51,0xd6,0xc1,0xfe,0xf4,0x03] + +0x05,0x00,0x51,0xd6,0xf0,0xfa,0xc0,0x03 +# W32-REAL16: v_med3_u16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] + +0x05,0x00,0x51,0xd6,0xfd,0xd4,0x04,0x03 +# W32-REAL16: v_med3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x51,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_med3_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x51,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_med3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x51,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_med3_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x51,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_med3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_med3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_med3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] @@ -4353,53 +4911,125 @@ # GFX11: v_med3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_min3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_min3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_min3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_min3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_min3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_min3_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_min3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_min3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_min3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_min3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_min3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_min3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_min3_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_min3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_min3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_min3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_min3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_min3_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_min3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_min3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_min3_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_min3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_min3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_min3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_min3_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_min3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_min3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_min3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_min3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_min3_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_min3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] - -0x05,0x7c,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] - -0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] - -0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_min3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_min3_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] + +0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +0x05,0x06,0x49,0xd6,0xc1,0xfe,0xf4,0xc3 +# W32-REAL16: v_min3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_min3_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] + +0x05,0x00,0x49,0xd6,0xf0,0xfa,0xc0,0x43 +# W32-REAL16: v_min3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x49,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_min3_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x49,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_min3_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x49,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_min3_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x49,0xd6,0xf0,0xfa,0xc0,0x43] + +0x05,0x02,0x49,0xd6,0xfd,0xd4,0x04,0x23 +# W32-REAL16: v_min3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x49,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_min3_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x49,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_min3_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x49,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_min3_f16 v5, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x49,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] # CHECK: v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] 0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11 -# GFX11: v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] +# W32-REAL16: v_min3_f16 v5.l, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] +# W32-FAKE16: v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] +# W64-REAL16: v_min3_f16 v5.l, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] +# W64-FAKE16: v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] + +0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_min3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x49,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_min3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_min3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] @@ -4447,49 +5077,118 @@ # GFX11: v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_min3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_min3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_min3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_min3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_min3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_min3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_min3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_min3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_min3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_min3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_min3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_min3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_min3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_min3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_min3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_min3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_min3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] - -0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_min3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_min3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_min3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_min3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +0x05,0x00,0x4a,0xd6,0xc1,0xfe,0xf4,0x03 +# W32-REAL16: v_min3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_min3_i16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_min3_i16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_min3_i16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] + +0x05,0x00,0x4a,0xd6,0xf0,0xfa,0xc0,0x03 +# W32-REAL16: v_min3_i16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] + +0x05,0x00,0x4a,0xd6,0xfd,0xd4,0x04,0x03 +# W32-REAL16: v_min3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_min3_i16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_min3_i16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_min3_i16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_min3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_min3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_min3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_min3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] @@ -4537,49 +5236,118 @@ # GFX11: v_min3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] - -0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_min3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] - -0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_min3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_min3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_min3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_min3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_min3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_min3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_min3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_min3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_min3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_min3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_min3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_min3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_min3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_min3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_min3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_min3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_min3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_min3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_min3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_min3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, null, exec_lo, 0xfe0b ; encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +0x05,0x00,0x4b,0xd6,0xc1,0xfe,0xf4,0x03 +# W32-REAL16: v_min3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_min3_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_min3_u16 v5.l, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_min3_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] + +0x05,0x00,0x4b,0xd6,0xf0,0xfa,0xc0,0x03 +# W32-REAL16: v_min3_u16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, 0x3800, m0, 0x3800 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] + +0x05,0x00,0x4b,0xd6,0xfd,0xd4,0x04,0x03 +# W32-REAL16: v_min3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_min3_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_min3_u16 v5.l, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_min3_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] - -0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_min3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_min3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_min3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_min3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 8f6e5d3b45fcc..693d56ff3a890 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -4281,46 +4281,118 @@ # GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4365,46 +4437,118 @@ # GFX11: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4448,383 +4592,1031 @@ 0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 # GFX11: v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] 0x05,0x00,0x4c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x4c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x4c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x04,0x4c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x05,0x4c,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +0x05,0x06,0x4c,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +0x05,0x01,0x4c,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff +# W32-REAL16: v_max3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -0x05,0x7c,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x0b,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x15,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# GFX11: v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x26,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] 0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX11: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] 0x05,0x00,0x4f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x4f,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x4f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x7c,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x04,0x4f,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x05,0x4f,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +0x05,0x06,0x4f,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +0x05,0x01,0x4f,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff +# W32-REAL16: v_med3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -0x05,0x0b,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x15,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# GFX11: v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x26,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x4f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x4f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] 0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX11: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] 0x05,0x00,0x49,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x49,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x04,0x49,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x05,0x49,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x49,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x49,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x49,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x49,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +0x05,0x06,0x49,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +0x05,0x01,0x49,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff +# W32-REAL16: v_min3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, |v1|, -v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -0x05,0x02,0x49,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x49,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x7c,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x0b,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x15,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# GFX11: v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x26,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x49,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x49,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] 0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX11: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff # GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index 28267e084f2bf..e6d57af127439 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -2456,44 +2456,113 @@ 0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 # GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] - -0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] - -0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] - -0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] - -0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX11: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX11: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2537,44 +2606,113 @@ 0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 # GFX11: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] - -0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] - -0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX11: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX11: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2618,365 +2756,995 @@ 0xff,0x90,0x59,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 # GFX11: v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -0x05,0x00,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4c,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x01,0x4c,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] - -0x05,0x02,0x4c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x4c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] - -0x05,0x7c,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] - -0x05,0x0b,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] - -0x05,0x15,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] - -0x05,0x26,0x4c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 -# GFX11: v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x4c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x04,0x4c,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] + +0x05,0x05,0x4c,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +0x05,0x06,0x4c,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4c,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4c,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4c,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4c,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +0x05,0x01,0x4c,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] 0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX11: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-REAL16: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4c,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4c,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] -0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] - -0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] - -0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] - -0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] - -0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX11: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] - -0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] - -0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] - -0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] - -0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX11: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] - -0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -0x05,0x00,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4f,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x01,0x4f,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] - -0x05,0x02,0x4f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x4f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] - -0x05,0x7c,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] - -0x05,0x0b,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] - -0x05,0x15,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] - -0x05,0x26,0x4f,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 -# GFX11: v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x4f,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x04,0x4f,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] + +0x05,0x05,0x4f,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +0x05,0x06,0x4f,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4f,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4f,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4f,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x4f,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +0x05,0x01,0x4f,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] 0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX11: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] - -0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x4f,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x4f,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] -0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] - -0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] - -0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] - -0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX11: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] - -0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] - -0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] - -0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] - -0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX11: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] - -0x05,0x00,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x49,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x01,0x49,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] - -0x05,0x02,0x49,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x49,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] - -0x05,0x7c,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] - -0x05,0x0b,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] - -0x05,0x15,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] - -0x05,0x26,0x49,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 -# GFX11: v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x49,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x04,0x49,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] + +0x05,0x05,0x49,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x49,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x49,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x49,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, v2, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x49,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +0x05,0x06,0x49,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x49,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x49,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x49,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, -|v2|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x49,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +0x05,0x01,0x49,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, |v1|, -v2, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] 0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX11: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-REAL16: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x49,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x49,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] -0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] - -0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] - -0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] - -0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] - -0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX11: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] - -0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] -0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] - -0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] - -0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] - -0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX11: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index cb9a7e9f70c9f..ad116022012df 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -2866,49 +2866,124 @@ # GFX12: v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 ; encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_mad_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_mad_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_mad_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_mad_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_mad_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_mad_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_mad_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_mad_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_mad_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_mad_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_mad_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_mad_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_mad_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_mad_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_mad_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_mad_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_mad_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_mad_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_mad_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_mad_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_mad_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_mad_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX12: v_mad_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_mad_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_mad_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_mad_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_mad_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] 0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX12: v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_i16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] 0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX12: v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_mad_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_mad_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] 0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX12: v_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_mad_i16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] 0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03 -# GFX12: v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] +# W32-REAL16: v_mad_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_mad_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] 0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_mad_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_mad_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_mad_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_mad_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_mad_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_mad_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_mad_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_mad_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x53,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04 # GFX12: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] @@ -3060,49 +3135,124 @@ # GFX12: v_mad_co_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] 0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_mad_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_mad_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_mad_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_mad_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_mad_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_mad_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_mad_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_mad_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_mad_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_mad_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_mad_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_mad_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_mad_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_mad_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_mad_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_mad_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_mad_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_mad_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_mad_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_mad_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_mad_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_mad_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX12: v_mad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_mad_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_mad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_mad_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_mad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] 0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX12: v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_u16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] 0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX12: v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_mad_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_mad_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] 0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX12: v_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_mad_u16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] 0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03 -# GFX12: v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] +# W32-REAL16: v_mad_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_mad_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] 0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_mad_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_mad_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_mad_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_mad_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_mad_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_mad_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_mad_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_mad_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x41,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_mad_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04 # GFX12: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] @@ -3344,49 +3494,124 @@ # GFX12: v_max3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x2a,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_max3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_max3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_max3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_max3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_max3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_max3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_max3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_max3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_max3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_max3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_max3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_max3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_max3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_max3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_max3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_max3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX12: v_max3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_max3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_max3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_max3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_max3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] 0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX12: v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_i16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] 0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX12: v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_max3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_max3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] 0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX12: v_max3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_max3_i16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] 0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03 -# GFX12: v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +# W32-REAL16: v_max3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_max3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] 0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_max3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_max3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4d,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_max3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] @@ -3434,49 +3659,124 @@ # GFX12: v_max3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_max3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_max3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_max3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_max3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_max3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_max3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_max3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_max3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_max3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_max3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_max3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_max3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_max3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_max3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_max3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_max3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX12: v_max3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_max3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_max3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_max3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_max3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] 0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX12: v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_u16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] 0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX12: v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_max3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_max3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] 0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX12: v_max3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_max3_u16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] 0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03 -# GFX12: v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +# W32-REAL16: v_max3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_max3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] + +0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_max3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_max3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_max3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_max3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_max3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_max3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_max3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_max3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4e,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_max3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_max3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] @@ -4010,49 +4310,100 @@ # GFX12: v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_med3_num_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_med3_num_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_med3_num_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_med3_num_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_med3_num_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_med3_num_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_med3_num_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_num_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_num_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_num_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_med3_num_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_med3_num_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_num_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_num_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_num_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_med3_num_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_med3_num_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_med3_num_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_med3_num_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_med3_num_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_med3_num_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_med3_num_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_med3_num_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_med3_num_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_med3_num_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_num_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_num_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX12: v_med3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_med3_num_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_med3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_med3_num_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_med3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_med3_num_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_med3_num_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_med3_num_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_med3_num_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_med3_num_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_med3_num_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_med3_num_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_med3_num_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_med3_num_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_med3_num_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x32,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX12: v_med3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_med3_num_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_med3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_med3_num_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_med3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x7c,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_num_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_num_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX12: v_med3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_med3_num_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_med3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_med3_num_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_med3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX12: v_med3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-REAL16: v_med3_num_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_med3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_med3_num_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_med3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] 0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23 -# GFX12: v_med3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_med3_num_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_med3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_med3_num_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_med3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_num_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_num_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_num_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_num_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x31,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_med3_num_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x31,0xd6,0x01,0x05,0x0e,0x00] @@ -4100,49 +4451,124 @@ # GFX12: v_med3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x31,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_med3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_med3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_med3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_med3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_med3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_med3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_med3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_med3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_med3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_med3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_med3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_med3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_med3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_med3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_med3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_med3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX12: v_med3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_med3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_med3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_med3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_med3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] 0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_i16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] 0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX12: v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_med3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_med3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] 0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX12: v_med3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_med3_i16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] 0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03 -# GFX12: v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +# W32-REAL16: v_med3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_med3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] 0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_med3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_med3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x50,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_med3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] @@ -4190,49 +4616,124 @@ # GFX12: v_med3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_med3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_med3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_med3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_med3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_med3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_med3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_med3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_med3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_med3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_med3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_med3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_med3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_med3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_med3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_med3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_med3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX12: v_med3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_med3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_med3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_med3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_med3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] 0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_u16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] 0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX12: v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_med3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_med3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] 0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX12: v_med3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_med3_u16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] 0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03 -# GFX12: v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +# W32-REAL16: v_med3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_med3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] + +0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_med3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_med3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_med3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_med3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_med3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_med3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_med3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_med3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x51,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_med3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_med3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] @@ -4370,49 +4871,124 @@ # GFX12: v_min3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x29,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_min3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_min3_i16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_min3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_min3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_i16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_min3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_min3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_i16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_min3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_min3_i16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_min3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_min3_i16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_min3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_min3_i16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_min3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_min3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_min3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_min3_i16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX12: v_min3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_min3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_min3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_min3_i16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_min3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] 0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX12: v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_i16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] 0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX12: v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_min3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_min3_i16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] 0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX12: v_min3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_min3_i16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] 0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03 -# GFX12: v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +# W32-REAL16: v_min3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_min3_i16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] 0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_min3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_i16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_i16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_min3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_i16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_i16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4a,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_i16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_min3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] @@ -4460,49 +5036,124 @@ # GFX12: v_min3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_min3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_min3_u16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_min3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_min3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_u16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_min3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_min3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_u16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_min3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_min3_u16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_min3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_min3_u16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01 -# GFX12: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +# W32-REAL16: v_min3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +# W32-FAKE16: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +# W64-REAL16: v_min3_u16 v5.l, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +# W64-FAKE16: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] 0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_min3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_min3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, m0, 0x3800, m0 ; encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] 0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_min3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_min3_u16 v5.l, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX12: v_min3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-REAL16: v_min3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +# W32-FAKE16: v_min3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-REAL16: v_min3_u16 v5.l, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +# W64-FAKE16: v_min3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] 0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX12: v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_u16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.h, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] 0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX12: v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-REAL16: v_min3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +# W32-FAKE16: v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-REAL16: v_min3_u16 v5.l, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +# W64-FAKE16: v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] 0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX12: v_min3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-REAL16: v_min3_u16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] 0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03 -# GFX12: v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +# W32-REAL16: v_min3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +# W32-FAKE16: v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +# W64-REAL16: v_min3_u16 v5.l, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +# W64-FAKE16: v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] + +0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_min3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_min3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_min3_u16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_min3_u16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_min3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_min3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_min3_u16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_min3_u16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4b,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX12: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_min3_u16 v255.h, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_min3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 64d9181c35808..354bdfb0c24cc 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -4626,49 +4626,154 @@ # GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] 0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] 0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] 0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] 0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4716,49 +4821,154 @@ # GFX12: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] 0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] 0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] 0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] 0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4851,229 +5061,724 @@ # GFX12: v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] 0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] 0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] 0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] 0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] 0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] 0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] 0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] 0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] 0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] 0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] 0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] 0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] 0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] 0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX12: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] 0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] 0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] 0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] 0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] 0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] 0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] 0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] 0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5121,94 +5826,304 @@ # GFX12: v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] 0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] 0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] 0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] 0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] 0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] 0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] 0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] 0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] 0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff # GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index b64b63663df5a..4b5f7cb2e0526 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -2739,46 +2739,148 @@ # GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] 0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] 0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] 0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] 0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2826,46 +2928,148 @@ # GFX12: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] 0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] 0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] 0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] 0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2958,217 +3162,700 @@ # GFX12: v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] 0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] 0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] 0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] 0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] 0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] 0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] 0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] 0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x01,0x32,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] 0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] 0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] 0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] 0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] 0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 -# GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x32,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x32,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x32,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x32,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x32,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x32,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x32,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x32,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x32,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x32,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] 0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX12: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] 0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] 0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] 0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] 0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] 0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] 0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] 0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] 0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3216,88 +3903,292 @@ # GFX12: v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] 0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] 0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] 0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] 0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] 0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] 0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] 0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] 0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 -# GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05 +# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] 0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] From ad32576cffc88bf7c359a528afbed7c2ae7ddb2d Mon Sep 17 00:00:00 2001 From: alx32 <103613512+alx32@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:00:56 -0800 Subject: [PATCH 06/35] [DWARFVerifier] Allow overlapping ranges for ICF-merged functions (#117952) This patch modifies the DWARF verifier to handle a valid case where two or more functions have identical address ranges due to being merged by ICF (Identical Code Folding). Previously, the verifier would incorrectly report these as errors, but functions merged via ICF (such as when using LLD's --keep-icf-stabs option) can legitimately share the same address range. A new test case has been added to verify this behavior using YAML-based DWARF data that simulates two DW_TAG_subprogram entries with identical address ranges. The test ensures that the verifier correctly identifies this as a valid case and doesn't emit any errors, while still maintaining the existing verification for truly invalid overlapping ranges in other scenarios. Before this change, the newly added test case would have failed, with `llvm-dwarfdump` marking the overlapping address ranges in the DWARF as an error. We also modify the existing tests `llvm-dwarfutil/ELF/X86/verify.test` and `llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml` which rely on the existence of the error that we're trying to suppress. We slightly change one offset so that the ranges don't perfectly overlap and an error is still generated. --- .../llvm/DebugInfo/DWARF/DWARFVerifier.h | 10 +- llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp | 12 +- .../X86/verify_no_overlap_error_icf.yaml | 153 ++++++++++++++++++ .../X86/verify_parent_zero_length.yaml | 8 +- .../tools/llvm-dwarfutil/ELF/X86/verify.test | 2 +- 5 files changed, 175 insertions(+), 10 deletions(-) create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/verify_no_overlap_error_icf.yaml diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index c2365a4c7cf64..7b51bb63cd15b 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -68,7 +68,9 @@ class DWARFVerifier { /// Inserts the address range. If the range overlaps with an existing /// range, the range that it overlaps with will be returned and the two - /// address ranges will be unioned together in "Ranges". + /// address ranges will be unioned together in "Ranges". If a duplicate + /// entry is attempted to be added, the duplicate range will not actually be + /// added and the returned iterator will point to end(). /// /// This is used for finding overlapping ranges in the DW_AT_ranges /// attribute of a DIE. It is also used as a set of address ranges that @@ -77,7 +79,9 @@ class DWARFVerifier { /// Inserts the address range info. If any of its ranges overlaps with a /// range in an existing range info, the range info is *not* added and an - /// iterator to the overlapping range info. + /// iterator to the overlapping range info. If a duplicate entry is + /// attempted to be added, the duplicate range will not actually be added + /// and the returned iterator will point to end(). /// /// This is used for finding overlapping children of the same DIE. die_range_info_iterator insert(const DieRangeInfo &RI); @@ -86,7 +90,7 @@ class DWARFVerifier { bool contains(const DieRangeInfo &RHS) const; /// Return true if any range in this object intersects with any range in - /// RHS. + /// RHS. Identical ranges are not considered to be intersecting. bool intersects(const DieRangeInfo &RHS) const; }; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 1fe3eb1e90fe6..8bf513538de7c 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -53,6 +53,11 @@ DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) { auto End = Ranges.end(); auto Pos = std::lower_bound(Begin, End, R); + // Check for exact duplicates which is an allowed special case + if (Pos != End && *Pos == R) { + return std::nullopt; + } + if (Pos != End) { DWARFAddressRange Range(*Pos); if (Pos->merge(R)) @@ -113,8 +118,11 @@ bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const { auto I1 = Ranges.begin(), E1 = Ranges.end(); auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end(); while (I1 != E1 && I2 != E2) { - if (I1->intersects(*I2)) - return true; + if (I1->intersects(*I2)) { + // Exact duplicates are allowed + if (!(*I1 == *I2)) + return true; + } if (I1->LowPC < I2->LowPC) ++I1; else diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_no_overlap_error_icf.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_no_overlap_error_icf.yaml new file mode 100644 index 0000000000000..b1ce724ff0b6d --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_no_overlap_error_icf.yaml @@ -0,0 +1,153 @@ +#--- comments.txt + +# This test verifies several scenarios with DW_TAG_subprogram address ranges: +# 1. Two subprograms can have identical ranges (shown with foo2 and foo3 having same low_pc/high_pc) +# This is valid and can happen when ICF (Identical Code Folding) merges functions. +# 2. Two subprograms can have overlapping ranges when using DW_AT_ranges +# (shown with func1_with_ranges and func2_with_ranges sharing range 0x5000-0x6000) +# This is also valid and can occur with -fbasic-block-sections=all +# 3. The test also verifies that non-identical overlapping ranges are correctly flagged as errors: +# - When modifying just the first range's high offset from 0x6000 to 0x5999, it creates an invalid subrange overlap +# - When modifying just the first instance of DW_AT_high_pc 0x77 to 0x66, it creates an invalid function overlap +# The test ensures llvm-dwarfdump --verify correctly validates these cases by: +# a) Accepting valid identical overlapping ranges +# b) Rejecting invalid non-identical overlapping ranges + +# Need to use split-file in order for `sed` calls below to work correctly +# RUN: split-file %s %t +# RUN: yaml2obj %t/test.yaml | llvm-dwarfdump --error-display=details --verify - | FileCheck %s +# CHECK: No errors. + +# RUN: sed '0,/HighOffset: 0x6000/{s//HighOffset: 0x5999/}' %t/test.yaml | yaml2obj | not llvm-dwarfdump --error-display=details --verify - | FileCheck %s --check-prefix=CHECK-RANGES +# CHECK-RANGES: error: DIEs have overlapping address ranges + +# RUN: sed '0,/Value: 0x77/{s/Value: 0x77/Value: 0x66/}' %t/test.yaml | yaml2obj | not llvm-dwarfdump --error-display=details --verify - | FileCheck %s --check-prefix=CHECK-HIGH-PC +# CHECK-HIGH-PC: error: DIEs have overlapping address ranges + +# RUN: sed '0,/LowOffset: 0x880111/{s//LowOffset: 0x880112/}' %t/test.yaml | yaml2obj | not llvm-dwarfdump --error-display=details --verify - | FileCheck %s --check-prefix=CHECK-LEX-BLOCK +# CHECK-LEX-BLOCK: DIE has overlapping ranges in DW_AT_ranges attribute + +#--- test.yaml +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +DWARF: + debug_abbrev: + - Table: + - Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_producer + Form: DW_FORM_string + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Attribute: DW_AT_name + Form: DW_FORM_string + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data8 + - Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Attribute: DW_AT_low_pc + Form: DW_FORM_addr + - Attribute: DW_AT_high_pc + Form: DW_FORM_data8 + - Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Attribute: DW_AT_ranges + Form: DW_FORM_sec_offset + - Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Tag: DW_TAG_lexical_block + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_ranges + Form: DW_FORM_sec_offset + debug_ranges: + - Offset: 0x0 + AddrSize: 0x8 + Entries: + - LowOffset: 0x1000 + HighOffset: 0x2000 + - LowOffset: 0x3000 + HighOffset: 0x4000 + - LowOffset: 0x5000 # Overlaps with 2nd range below + HighOffset: 0x6000 + - LowOffset: 0x0 + HighOffset: 0x0 + - Offset: 0x50 + AddrSize: 0x8 + Entries: + - LowOffset: 0x2500 + HighOffset: 0x2800 + - LowOffset: 0x5000 # Overlaps with 3rd range above + HighOffset: 0x6000 + - LowOffset: 0x7000 + HighOffset: 0x8000 + - LowOffset: 0x0 + HighOffset: 0x0 + - Offset: 0xA0 # Added Range List #3 for lexical block + AddrSize: 0x8 + Entries: + - LowOffset: 0x880111 + HighOffset: 0x881222 + - LowOffset: 0x882333 + HighOffset: 0x883444 + - LowOffset: 0x880111 # Overlaps with 1st range in the same list + HighOffset: 0x881222 + - LowOffset: 0x0 # End of list + HighOffset: 0x0 + debug_info: + - Version: 4 + Entries: + - AbbrCode: 1 + Values: + - CStr: by_hand + - Value: 0x04 + - CStr: CU1 + - Value: 0x1000 + - Value: 0x100 + - AbbrCode: 4 + Values: + - CStr: int + - AbbrCode: 2 + Values: + - CStr: foo1 + - Value: 0x1000 + - Value: 0x10 + - AbbrCode: 2 + Values: + - CStr: foo2 + - Value: 0x0 # Overlaps with 'foo3' below + - Value: 0x77 + - AbbrCode: 2 + Values: + - CStr: foo3 + - Value: 0x0 # Overlaps with 'foo2' above + - Value: 0x77 + - AbbrCode: 3 + Values: + - CStr: func1_with_ranges + - Value: 0x0 + - AbbrCode: 3 + Values: + - CStr: func2_with_ranges + - Value: 0x50 + - AbbrCode: 5 # Added lexical block using ranges + Values: + - Value: 0xA0 # Range list index in debug_ranges + - AbbrCode: 0 +... diff --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml index 655819515f0ff..8eb7a349b9b80 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml +++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml @@ -19,7 +19,7 @@ # DW_AT_high_pc (0x0000000000000000) # # 0x00000033: DW_TAG_lexical_block -# DW_AT_low_pc (0x0000000000001000) +# DW_AT_low_pc (0x0000000000001001) # DW_AT_high_pc (0x0000000000002000) # # 0x00000044: DW_TAG_lexical_block @@ -47,7 +47,7 @@ # CHECK: error: DIEs have overlapping address ranges: # CHECK: 0x00000044: DW_TAG_lexical_block -# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000001000) +# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000001001) # CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000000002000) # CHECK: 0x00000033: DW_TAG_lexical_block @@ -61,7 +61,7 @@ # CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000000) # CHECK: 0x00000044: DW_TAG_lexical_block -# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000001000) +# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000001001) # CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000000002000) @@ -229,7 +229,7 @@ DWARF: - Value: 0x0000000000002000 - AbbrCode: 0x00000003 Values: - - Value: 0x0000000000001000 + - Value: 0x0000000000001001 - Value: 0x0000000000002000 - AbbrCode: 0x00000000 - AbbrCode: 0x00000000 diff --git a/llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test b/llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test index bf736937782c8..73ee11f46d10e 100644 --- a/llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test +++ b/llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test @@ -150,7 +150,7 @@ DWARF: Values: - CStr: foo3 - Value: 0x0 - - Value: 0x100 + - Value: 0x80 - Value: 0x00000040 - AbbrCode: 0 ... From 56cb55429199435a78f6e836f52cf41577406e90 Mon Sep 17 00:00:00 2001 From: joaosaffran <126493771+joaosaffran@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:10:05 -0800 Subject: [PATCH 07/35] [NFC] Updating Debug Info generation for 'this' (#119445) This is PR is updating the debug info generation for `this`. This is required to fix the generation of debug information for HLSL RWBuffer type. This was required from another PR: https://github.com/llvm/llvm-project/pull/119041/files Co-authored-by: Joao Saffran --- clang/lib/CodeGen/CGDebugInfo.cpp | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ff27690d47b08..f29ddece5dbc9 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2021,28 +2021,10 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( // ThisPtr may be null if the member function has an explicit 'this' // parameter. if (!ThisPtr.isNull()) { - const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl(); - if (isa(RD)) { - // Create pointer type directly in this case. - const PointerType *ThisPtrTy = cast(ThisPtr); - uint64_t Size = CGM.getContext().getTypeSize(ThisPtrTy); - auto Align = getTypeAlignIfRequired(ThisPtrTy, CGM.getContext()); - llvm::DIType *PointeeType = - getOrCreateType(ThisPtrTy->getPointeeType(), Unit); - llvm::DIType *ThisPtrType = - DBuilder.createPointerType(PointeeType, Size, Align); - TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType); - // TODO: This and the artificial type below are misleading, the - // types aren't artificial the argument is, but the current - // metadata doesn't represent that. - ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType); - Elts.push_back(ThisPtrType); - } else { - llvm::DIType *ThisPtrType = getOrCreateType(ThisPtr, Unit); - TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType); - ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType); - Elts.push_back(ThisPtrType); - } + llvm::DIType *ThisPtrType = getOrCreateType(ThisPtr, Unit); + TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType); + ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType); + Elts.push_back(ThisPtrType); } // Copy rest of the arguments. From 83643ddf2f53d269f2350510c11a02704b333393 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 17 Dec 2024 11:25:04 -0800 Subject: [PATCH 08/35] [lldb] Improve error reporting in GetLocation_DW_OP_addr (#120162) Instead of simply raising an error flag, use an llvm::Expected to propagate a meaningful error to the caller, who can report it. rdar://139705570 --- .../include/lldb/Expression/DWARFExpression.h | 11 +++----- lldb/source/Expression/DWARFExpression.cpp | 22 ++++++++------- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 28 +++++++++++-------- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index e85ba464dea6b..2c1e717ee32eb 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -16,6 +16,7 @@ #include "lldb/Utility/Status.h" #include "lldb/lldb-private.h" #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h" +#include "llvm/Support/Error.h" #include namespace lldb_private { @@ -61,15 +62,11 @@ class DWARFExpression { /// The dwarf unit this expression belongs to. Only required to resolve /// DW_OP{addrx, GNU_addr_index}. /// - /// \param[out] error - /// If the location stream contains unknown DW_OP opcodes or the - /// data is missing, \a error will be set to \b true. - /// /// \return /// The address specified by the operation, if the operation exists, or - /// LLDB_INVALID_ADDRESS otherwise. - lldb::addr_t GetLocation_DW_OP_addr(const plugin::dwarf::DWARFUnit *dwarf_cu, - bool &error) const; + /// an llvm::Error otherwise. + llvm::Expected + GetLocation_DW_OP_addr(const plugin::dwarf::DWARFUnit *dwarf_cu) const; bool Update_DW_OP_addr(const plugin::dwarf::DWARFUnit *dwarf_cu, lldb::addr_t file_addr); diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index a7126b25c1cc3..1d826e341e2c4 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -343,30 +343,32 @@ static lldb::offset_t GetOpcodeDataSize(const DataExtractor &data, } } -lldb::addr_t DWARFExpression::GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu, - bool &error) const { - error = false; +llvm::Expected +DWARFExpression::GetLocation_DW_OP_addr(const DWARFUnit *dwarf_cu) const { lldb::offset_t offset = 0; while (m_data.ValidOffset(offset)) { const uint8_t op = m_data.GetU8(&offset); if (op == DW_OP_addr) return m_data.GetAddress(&offset); + if (op == DW_OP_GNU_addr_index || op == DW_OP_addrx) { - uint64_t index = m_data.GetULEB128(&offset); + const uint64_t index = m_data.GetULEB128(&offset); if (dwarf_cu) return dwarf_cu->ReadAddressFromDebugAddrSection(index); - error = true; - break; + return llvm::createStringError("cannot evaluate %s without a DWARF unit", + DW_OP_value_to_name(op)); } + const lldb::offset_t op_arg_size = GetOpcodeDataSize(m_data, offset, op, dwarf_cu); - if (op_arg_size == LLDB_INVALID_OFFSET) { - error = true; - break; - } + if (op_arg_size == LLDB_INVALID_OFFSET) + return llvm::createStringError("cannot get opcode data size for %s", + DW_OP_value_to_name(op)); + offset += op_arg_size; } + return LLDB_INVALID_ADDRESS; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 000776d746706..87517266fced5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/Threading.h" #include "lldb/Core/Module.h" @@ -1704,7 +1705,7 @@ SymbolFileDWARF *SymbolFileDWARF::GetDIERefSymbolFile(const DIERef &die_ref) { // We have a SymbolFileDWARFDebugMap, so let it find the right file if (SymbolFileDWARFDebugMap *debug_map = GetDebugMapSymfile()) return debug_map->GetSymbolFileByOSOIndex(*file_index); - + // Handle the .dwp file case correctly if (*file_index == DIERef::k_file_index_mask) return GetDwpSymbolFile().get(); // DWP case @@ -3506,17 +3507,20 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, // Check if the location has a DW_OP_addr with any address value... lldb::addr_t location_DW_OP_addr = LLDB_INVALID_ADDRESS; if (!location_is_const_value_data) { - bool op_error = false; - const DWARFExpression* location = location_list.GetAlwaysValidExpr(); - if (location) - location_DW_OP_addr = - location->GetLocation_DW_OP_addr(location_form.GetUnit(), op_error); - if (op_error) { - StreamString strm; - location->DumpLocation(&strm, eDescriptionLevelFull, nullptr); - GetObjectFile()->GetModule()->ReportError( - "{0:x16}: {1} ({2}) has an invalid location: {3}", die.GetOffset(), - DW_TAG_value_to_name(die.Tag()), die.Tag(), strm.GetData()); + if (const DWARFExpression *location = + location_list.GetAlwaysValidExpr()) { + if (auto maybe_location_DW_OP_addr = + location->GetLocation_DW_OP_addr(location_form.GetUnit())) { + location_DW_OP_addr = *maybe_location_DW_OP_addr; + } else { + StreamString strm; + location->DumpLocation(&strm, eDescriptionLevelFull, nullptr); + GetObjectFile()->GetModule()->ReportError( + "{0:x16}: {1} ({2}) has an invalid location: {3}: {4}", + die.GetOffset(), DW_TAG_value_to_name(die.Tag()), die.Tag(), + llvm::fmt_consume(maybe_location_DW_OP_addr.takeError()), + strm.GetData()); + } } if (location_DW_OP_addr != LLDB_INVALID_ADDRESS) is_static_lifetime = true; From e5521fae944c1f3f6905ce5902819a5c9be7f802 Mon Sep 17 00:00:00 2001 From: Malte Dehling Date: Tue, 17 Dec 2024 11:28:00 -0800 Subject: [PATCH 09/35] [mlir-tblgen] Fix bug in emitEnumDoc (#118131) Fixes a crash (assertion failure) in `mlir-tblgen -emit-enum-doc` caused by calling `EnumAttr()` for the wrong type of `Record *`: `EnumAttr` rather than `EnumAttrInfo` as asserted. Compare the corresponding line in `emitDialectDoc()`: https://github.com/llvm/llvm-project/blob/0ad6be1927f89cef09aa5d0fb244873f687997c9/mlir/tools/mlir-tblgen/OpDocGen.cpp#L532 Co-authored-by: Malte Dehling --- mlir/tools/mlir-tblgen/OpDocGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/tools/mlir-tblgen/OpDocGen.cpp b/mlir/tools/mlir-tblgen/OpDocGen.cpp index d499c78a5cf44..1c394f5680a5c 100644 --- a/mlir/tools/mlir-tblgen/OpDocGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDocGen.cpp @@ -409,7 +409,7 @@ static void emitEnumDoc(const EnumAttr &def, raw_ostream &os) { static void emitEnumDoc(const RecordKeeper &records, raw_ostream &os) { os << "\n"; - for (const Record *def : records.getAllDerivedDefinitions("EnumAttr")) + for (const Record *def : records.getAllDerivedDefinitions("EnumAttrInfo")) emitEnumDoc(EnumAttr(def), os); } From ec636cf3c5048039bd3c52b1ebdb66dabcd273fe Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Tue, 17 Dec 2024 19:29:18 +0000 Subject: [PATCH 10/35] [llvm-split][nfc] Harmonize help and error message (#120062) Somme error / help message refer to options with a single dash while help refer to options with a double dash. --- llvm/test/tools/llvm-split/target-specific-split.ll | 2 +- llvm/tools/llvm-split/llvm-split.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/tools/llvm-split/target-specific-split.ll b/llvm/test/tools/llvm-split/target-specific-split.ll index f368a563d977b..030fc0126646c 100644 --- a/llvm/test/tools/llvm-split/target-specific-split.ll +++ b/llvm/test/tools/llvm-split/target-specific-split.ll @@ -4,7 +4,7 @@ ; Basic test for a target that doesn't support target-specific module splitting. -; CHECK: warning: -preserve-locals has no effect when using TargetMachine::splitModule +; CHECK: warning: --preserve-locals has no effect when using TargetMachine::splitModule ; CHECK: warning: TargetMachine::splitModule failed, falling back to default splitModule implementation define void @bar() { diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp index c456403e6bc68..1b1f97f44e274 100644 --- a/llvm/tools/llvm-split/llvm-split.cpp +++ b/llvm/tools/llvm-split/llvm-split.cpp @@ -67,7 +67,7 @@ static cl::opt cl::value_desc("triple"), cl::cat(SplitCategory)); static cl::opt - MCPU("mcpu", cl::desc("Target CPU, ignored if -mtriple is not used"), + MCPU("mcpu", cl::desc("Target CPU, ignored if --mtriple is not used"), cl::value_desc("cpu"), cl::cat(SplitCategory)); int main(int argc, char **argv) { @@ -125,11 +125,11 @@ int main(int argc, char **argv) { if (TM) { if (PreserveLocals) { - errs() << "warning: -preserve-locals has no effect when using " + errs() << "warning: --preserve-locals has no effect when using " "TargetMachine::splitModule\n"; } if (RoundRobin) - errs() << "warning: -round-robin has no effect when using " + errs() << "warning: --round-robin has no effect when using " "TargetMachine::splitModule\n"; if (TM->splitModule(*M, NumOutputs, HandleModulePart)) From c9a5a6d18bd71b203798b9188f565bdf173ad91b Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 17 Dec 2024 14:28:28 -0500 Subject: [PATCH 11/35] [lld/COFF] Remove unused InputFile::LazyObjectKind Its use was removed in d496abbe2a037. No behavior change. --- lld/COFF/InputFiles.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 956e4dd8bc4cf..d3075c5e0a338 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -72,7 +72,6 @@ class InputFile { enum Kind { ArchiveKind, ObjectKind, - LazyObjectKind, PDBKind, ImportKind, BitcodeKind, From 0e11e194167ff4e4959f0b908b9de5d3f5f801f5 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 17 Dec 2024 11:13:20 -0800 Subject: [PATCH 12/35] [SLP][NFC]Remove undef and update tests --- .../X86/revectorized_rdx_crash.ll | 42 +++++++++---------- .../SLPVectorizer/X86/undef_vect.ll | 40 +++++++++--------- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll index 364b0f4c1a3a7..2a3029bf325ca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll @@ -12,67 +12,67 @@ ; iteration (it was matched and vectorized, which added a use of a deleted ; instruction) -define void @test(i1 %arg) { +define void @test(i1 %arg, ptr %p) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 %arg, label [[IF_END:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; CHECK: for.cond.preheader: -; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 2 -; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 3 +; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 2 +; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[I]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]]) -; CHECK-NEXT: [[OP_RDX3:%.*]] = add i32 [[TMP1]], undef +; CHECK-NEXT: [[OP_RDX3:%.*]] = add i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[I1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) -; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP3]], undef +; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[TMP3]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[OP_RDX3]], 2 -; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 undef, [[TMP4]] +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 0, [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[OP_RDX2]], 2 ; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[TMP5]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[OP_RDX1]], [[FOR_COND_PREHEADER]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[OP_RDX1]], [[FOR_COND_PREHEADER]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret void ; entry: br i1 %arg, label %if.end, label %for.cond.preheader for.cond.preheader: ; preds = %entry - %i = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 2 - %i1 = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 3 - %i2 = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 4 - %i3 = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 5 - %i4 = getelementptr inbounds [100 x i32], ptr undef, i64 0, i64 6 + %i = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 2 + %i1 = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 3 + %i2 = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 4 + %i3 = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 5 + %i4 = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 6 %ld0 = load i32, ptr %i, align 8 %ld1 = load i32, ptr %i1, align 4 %ld2 = load i32, ptr %i2, align 16 %ld3 = load i32, ptr %i3, align 4 - %i5 = add i32 undef, undef + %i5 = add i32 0, 0 %i6 = add i32 %i5, %ld3 %i7 = add i32 %i6, %ld2 %i8 = add i32 %i7, %ld1 %i9 = add i32 %i8, %ld0 - %i10 = add i32 %i9, undef + %i10 = add i32 %i9, 0 %i11 = add i32 %i9, %i10 %ld4 = load i32, ptr %i1, align 4 %ld5 = load i32, ptr %i2, align 16 %ld6 = load i32, ptr %i3, align 4 %ld7 = load i32, ptr %i4, align 8 - %i12 = add i32 undef, undef + %i12 = add i32 0, 0 %i13 = add i32 %i12, %ld7 %i14 = add i32 %i13, %ld6 %i15 = add i32 %i14, %ld5 %i16 = add i32 %i15, %ld4 - %i17 = add i32 %i16, undef + %i17 = add i32 %i16, 0 %i18 = add i32 %i17, %i11 %i19 = add i32 %i17, %i18 - %i20 = add i32 undef, %i19 - %i21 = add i32 undef, %i20 - %i22 = add i32 undef, %i21 - %i23 = add i32 undef, %i22 + %i20 = add i32 0, %i19 + %i21 = add i32 0, %i20 + %i22 = add i32 0, %i21 + %i23 = add i32 0, %i22 br label %if.end if.end: ; preds = %for.cond.preheader, %entry - %r = phi i32 [ %i23, %for.cond.preheader ], [ undef, %entry ] + %r = phi i32 [ %i23, %for.cond.preheader ], [ 0, %entry ] ret void } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll index 431f95dd0831c..a552a24eb7b26 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll @@ -3,55 +3,55 @@ %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76" = type { i32, i32 } -define void @_Z2azv() local_unnamed_addr { +define void @_Z2azv(ptr %p) local_unnamed_addr { ; CHECK-LABEL: @_Z2azv( ; CHECK-NEXT: for.body.lr.ph: -; CHECK-NEXT: [[DOTSROA_CAST_4:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 4, i32 0 +; CHECK-NEXT: [[DOTSROA_CAST_4:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr [[P:%.*]], i64 4, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[DOTSROA_CAST_4]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP2]], undef -; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP2]], i32 undef -; CHECK-NEXT: [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_RDX1]] -; CHECK-NEXT: [[CMP_I1_10:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_9]], undef +; CHECK-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP2]], 0 +; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP2]], i32 0 +; CHECK-NEXT: [[DOTSROA_SPECULATED_9:%.*]] = select i1 false, i32 0, i32 [[OP_RDX1]] +; CHECK-NEXT: [[CMP_I1_10:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_9]], 0 ; CHECK-NEXT: ret void ; for.body.lr.ph: - %.sroa_cast.4 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 4, i32 0 + %.sroa_cast.4 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr %p, i64 4, i32 0 %retval.sroa.0.0.copyload.i5.4 = load i32, ptr %.sroa_cast.4, align 4 - %.sroa_raw_idx.4 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 4, i32 1 + %.sroa_raw_idx.4 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr %p, i64 4, i32 1 %retval.sroa.0.0.copyload.i7.4 = load i32, ptr %.sroa_raw_idx.4, align 4 %cmp.i2.4 = icmp slt i32 %retval.sroa.0.0.copyload.i5.4, %retval.sroa.0.0.copyload.i7.4 %0 = select i1 %cmp.i2.4, i32 %retval.sroa.0.0.copyload.i7.4, i32 %retval.sroa.0.0.copyload.i5.4 - %cmp.i1.4 = icmp slt i32 undef, %0 - %.sroa.speculated.4 = select i1 %cmp.i1.4, i32 %0, i32 undef - %.sroa_cast.5 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 5, i32 0 + %cmp.i1.4 = icmp slt i32 0, %0 + %.sroa.speculated.4 = select i1 %cmp.i1.4, i32 %0, i32 0 + %.sroa_cast.5 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr %p, i64 5, i32 0 %retval.sroa.0.0.copyload.i5.5 = load i32, ptr %.sroa_cast.5, align 4 - %.sroa_raw_idx.5 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 5, i32 1 + %.sroa_raw_idx.5 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr %p, i64 5, i32 1 %retval.sroa.0.0.copyload.i7.5 = load i32, ptr %.sroa_raw_idx.5, align 4 %cmp.i2.5 = icmp slt i32 %retval.sroa.0.0.copyload.i5.5, %retval.sroa.0.0.copyload.i7.5 %1 = select i1 %cmp.i2.5, i32 %retval.sroa.0.0.copyload.i7.5, i32 %retval.sroa.0.0.copyload.i5.5 %cmp.i1.5 = icmp slt i32 %.sroa.speculated.4, %1 %.sroa.speculated.5 = select i1 %cmp.i1.5, i32 %1, i32 %.sroa.speculated.4 - %.sroa_cast.6 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 6, i32 0 + %.sroa_cast.6 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr %p, i64 6, i32 0 %retval.sroa.0.0.copyload.i5.6 = load i32, ptr %.sroa_cast.6, align 4 - %.sroa_raw_idx.6 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 6, i32 1 + %.sroa_raw_idx.6 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr %p, i64 6, i32 1 %retval.sroa.0.0.copyload.i7.6 = load i32, ptr %.sroa_raw_idx.6, align 4 %cmp.i2.6 = icmp slt i32 %retval.sroa.0.0.copyload.i5.6, %retval.sroa.0.0.copyload.i7.6 %2 = select i1 %cmp.i2.6, i32 %retval.sroa.0.0.copyload.i7.6, i32 %retval.sroa.0.0.copyload.i5.6 %cmp.i1.6 = icmp slt i32 %.sroa.speculated.5, %2 %.sroa.speculated.6 = select i1 %cmp.i1.6, i32 %2, i32 %.sroa.speculated.5 - %.sroa_cast.7 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 7, i32 0 + %.sroa_cast.7 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr %p, i64 7, i32 0 %retval.sroa.0.0.copyload.i5.7 = load i32, ptr %.sroa_cast.7, align 4 - %.sroa_raw_idx.7 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr undef, i64 7, i32 1 + %.sroa_raw_idx.7 = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", ptr %p, i64 7, i32 1 %retval.sroa.0.0.copyload.i7.7 = load i32, ptr %.sroa_raw_idx.7, align 4 %cmp.i2.7 = icmp slt i32 %retval.sroa.0.0.copyload.i5.7, %retval.sroa.0.0.copyload.i7.7 %3 = select i1 %cmp.i2.7, i32 %retval.sroa.0.0.copyload.i7.7, i32 %retval.sroa.0.0.copyload.i5.7 %cmp.i1.7 = icmp slt i32 %.sroa.speculated.6, %3 %.sroa.speculated.7 = select i1 %cmp.i1.7, i32 %3, i32 %.sroa.speculated.6 - %cmp.i1.8 = icmp slt i32 %.sroa.speculated.7, undef - %.sroa.speculated.8 = select i1 %cmp.i1.8, i32 undef, i32 %.sroa.speculated.7 - %.sroa.speculated.9 = select i1 undef, i32 undef, i32 %.sroa.speculated.8 - %cmp.i1.10 = icmp slt i32 %.sroa.speculated.9, undef + %cmp.i1.8 = icmp slt i32 %.sroa.speculated.7, 0 + %.sroa.speculated.8 = select i1 %cmp.i1.8, i32 0, i32 %.sroa.speculated.7 + %.sroa.speculated.9 = select i1 0, i32 0, i32 %.sroa.speculated.8 + %cmp.i1.10 = icmp slt i32 %.sroa.speculated.9, 0 ret void } From b2c363e2616dc6ac6ee76c223d84ec512e118d5d Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Tue, 17 Dec 2024 12:10:29 -0800 Subject: [PATCH 13/35] =?UTF-8?q?[flang]=20Fix=20generic=20resolution=20wi?= =?UTF-8?q?th=20actual/dummy=20procedure=20incompatib=E2=80=A6=20(#120105)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ility We generally allow any legal procedure pointer target as an actual argument for association with a dummy procedure, since many actual procedures are underspecified EXTERNALs. But for proper generic resolution, it is necessary to disallow incompatible functions with explicit result types. Fixes https://github.com/llvm/llvm-project/issues/119151. --- flang/lib/Semantics/expression.cpp | 35 ++++++++++++++++++++++++------ flang/test/Semantics/generic11.f90 | 25 +++++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 flang/test/Semantics/generic11.f90 diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index b9be586f4d772..0dbd6eaff40e3 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -2489,7 +2489,8 @@ auto ExpressionAnalyzer::AnalyzeProcedureComponentRef( // Can actual be argument associated with dummy? static bool CheckCompatibleArgument(bool isElemental, - const ActualArgument &actual, const characteristics::DummyArgument &dummy) { + const ActualArgument &actual, const characteristics::DummyArgument &dummy, + FoldingContext &foldingContext) { const auto *expr{actual.UnwrapExpr()}; return common::visit( common::visitors{ @@ -2509,8 +2510,26 @@ static bool CheckCompatibleArgument(bool isElemental, } return false; }, - [&](const characteristics::DummyProcedure &) { - return expr && IsProcedurePointerTarget(*expr); + [&](const characteristics::DummyProcedure &dummy) { + if (!expr || !IsProcedurePointerTarget(*expr)) { + return false; + } + if (auto actualProc{characteristics::Procedure::Characterize( + *expr, foldingContext)}) { + const auto &dummyResult{dummy.procedure.value().functionResult}; + const auto *dummyTypeAndShape{ + dummyResult ? dummyResult->GetTypeAndShape() : nullptr}; + const auto &actualResult{actualProc->functionResult}; + const auto *actualTypeAndShape{ + actualResult ? actualResult->GetTypeAndShape() : nullptr}; + if (dummyTypeAndShape && actualTypeAndShape) { + // Return false when the function results' types are both + // known and not compatible. + return actualTypeAndShape->type().IsTkCompatibleWith( + dummyTypeAndShape->type()); + } + } + return true; }, [&](const characteristics::AlternateReturn &) { return actual.isAlternateReturn(); @@ -2521,15 +2540,16 @@ static bool CheckCompatibleArgument(bool isElemental, // Are the actual arguments compatible with the dummy arguments of procedure? static bool CheckCompatibleArguments( - const characteristics::Procedure &procedure, - const ActualArguments &actuals) { + const characteristics::Procedure &procedure, const ActualArguments &actuals, + FoldingContext &foldingContext) { bool isElemental{procedure.IsElemental()}; const auto &dummies{procedure.dummyArguments}; CHECK(dummies.size() == actuals.size()); for (std::size_t i{0}; i < dummies.size(); ++i) { const characteristics::DummyArgument &dummy{dummies[i]}; const std::optional &actual{actuals[i]}; - if (actual && !CheckCompatibleArgument(isElemental, *actual, dummy)) { + if (actual && + !CheckCompatibleArgument(isElemental, *actual, dummy, foldingContext)) { return false; } } @@ -2726,7 +2746,8 @@ std::pair ExpressionAnalyzer::ResolveGeneric( } if (semantics::CheckInterfaceForGeneric(*procedure, localActuals, context_, false /* no integer conversions */) && - CheckCompatibleArguments(*procedure, localActuals)) { + CheckCompatibleArguments( + *procedure, localActuals, foldingContext_)) { if ((procedure->IsElemental() && elemental) || (!procedure->IsElemental() && nonElemental)) { int d{ComputeCudaMatchingDistance( diff --git a/flang/test/Semantics/generic11.f90 b/flang/test/Semantics/generic11.f90 new file mode 100644 index 0000000000000..14383ab150fe4 --- /dev/null +++ b/flang/test/Semantics/generic11.f90 @@ -0,0 +1,25 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! Regression test for bug #119151 +interface sub + subroutine sub1(ifun) + interface + integer function ifun() + end + end interface + end + subroutine sub2(rfun) + real rfun + external rfun + end +end interface +integer ifun +real rfun +complex zfun +external ifun, rfun, zfun, xfun +call sub(ifun) +call sub(rfun) +!ERROR: No specific subroutine of generic 'sub' matches the actual arguments +call sub(zfun) +!ERROR: The actual arguments to the generic procedure 'sub' matched multiple specific procedures, perhaps due to use of NULL() without MOLD= or an actual procedure with an implicit interface +call sub(xfun) +end From a957cedea9657addbe8b860852cc98306aa437e7 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Tue, 17 Dec 2024 12:10:50 -0800 Subject: [PATCH 14/35] [flang] Handle substring in data statement constant (#120130) The case of a constant substring wasn't handled in the parser for data statement constants. Fixes https://github.com/llvm/llvm-project/issues/119005. --- flang/include/flang/Parser/parse-tree.h | 7 ++++--- flang/lib/Parser/Fortran-parsers.cpp | 7 +++++-- flang/lib/Parser/expr-parsers.cpp | 2 +- flang/lib/Parser/type-parsers.h | 1 + flang/test/Parser/lit-substr-data.f90 | 7 +++++++ 5 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 flang/test/Parser/lit-substr-data.f90 diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 1d97126d17dbc..f87a1cfceb37b 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -1481,9 +1481,10 @@ struct DataStmtConstant { UNION_CLASS_BOILERPLATE(DataStmtConstant); CharBlock source; mutable TypedExpr typedExpr; - std::variant, StructureConstructor> + std::variant, + LiteralConstant, SignedIntLiteralConstant, SignedRealLiteralConstant, + SignedComplexLiteralConstant, NullInit, common::Indirection, + StructureConstructor> u; }; diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index a3d2c36310807..aa0a2a6db7d58 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -929,8 +929,11 @@ TYPE_PARSER(construct(intLiteralConstant) || // components can be ambiguous with a scalar-constant-subobject. // So we parse literal constants, designator, null-init, and // structure-constructor, so that semantics can figure things out later -// with the symbol table. -TYPE_PARSER(sourced(first(construct(literalConstant), +// with the symbol table. A literal constant substring must be attempted +// first to avoid a partial match with a literal constant. +TYPE_PARSER(sourced(first( + construct(indirect(charLiteralConstantSubstring)), + construct(literalConstant), construct(signedRealLiteralConstant), construct(signedIntLiteralConstant), extension( diff --git a/flang/lib/Parser/expr-parsers.cpp b/flang/lib/Parser/expr-parsers.cpp index 77a13de7fd02d..0b6e21e1ba2ff 100644 --- a/flang/lib/Parser/expr-parsers.cpp +++ b/flang/lib/Parser/expr-parsers.cpp @@ -68,7 +68,7 @@ TYPE_PARSER(construct( // type-param-inquiry is parsed as a structure component, except for // substring%KIND/LEN constexpr auto primary{instrumented("primary"_en_US, - first(construct(indirect(Parser{})), + first(construct(indirect(charLiteralConstantSubstring)), construct(literalConstant), construct(construct("(" >> expr / !","_tok / recovery(")"_tok, SkipPastNested<'(', ')'>{}))), diff --git a/flang/lib/Parser/type-parsers.h b/flang/lib/Parser/type-parsers.h index d7e0cd06c3f44..623437f9d2e1d 100644 --- a/flang/lib/Parser/type-parsers.h +++ b/flang/lib/Parser/type-parsers.h @@ -63,6 +63,7 @@ constexpr Parser kindParam; // R709 constexpr Parser realLiteralConstant; // R714 constexpr Parser charLength; // R723 constexpr Parser charLiteralConstant; // R724 +constexpr Parser charLiteralConstantSubstring; constexpr Parser initialization; // R743 & R805 constexpr Parser derivedTypeSpec; // R754 constexpr Parser typeDeclarationStmt; // R801 diff --git a/flang/test/Parser/lit-substr-data.f90 b/flang/test/Parser/lit-substr-data.f90 new file mode 100644 index 0000000000000..7eed616a1ee2e --- /dev/null +++ b/flang/test/Parser/lit-substr-data.f90 @@ -0,0 +1,7 @@ +!RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s +!Regression test for bug #119005 +character*2 :: ary4 +!CHECK: DATA ary4/"cd"/ +data ary4/"abcdef"(3:4)/ +end + From 0b91d77bf4e4ff65ebeed90acd141018d9889e0f Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 17 Dec 2024 12:18:37 -0800 Subject: [PATCH 15/35] [libc] Use __attribute__((__nothrow__)) for __NOEXCEPT in C (#114653) Consistent with glibc headers, where `noexcept` is used in C++ (or `throw()` in older C++ which llvm-libc doesn't support) in the public function declarations, `__attribute__((__nothrow__))` is used in C for compilers that support it. --- libc/include/__llvm-libc-common.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libc/include/__llvm-libc-common.h b/libc/include/__llvm-libc-common.h index e0e29bbbd5f87..d54ee7b9f91f3 100644 --- a/libc/include/__llvm-libc-common.h +++ b/libc/include/__llvm-libc-common.h @@ -53,7 +53,11 @@ #define __restrict restrict // C99 and above support the restrict keyword. #undef __NOEXCEPT +#ifdef __GNUC__ +#define __NOEXCEPT __attribute__((__nothrow__)) +#else #define __NOEXCEPT +#endif #endif // __cplusplus From 16ef239520d7bca2002346fe2b4003947ad21ecc Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Mon, 16 Dec 2024 11:31:05 +0100 Subject: [PATCH 16/35] [LLD][COFF] Introduce hybrid symbol table for EC input files on ARM64X (#119294) --- lld/COFF/COFFLinkerContext.h | 21 +++++ lld/COFF/Driver.cpp | 113 +++++++++++++++------------ lld/COFF/InputFiles.cpp | 9 ++- lld/COFF/SymbolTable.h | 6 +- lld/test/COFF/arm64ec-codemap.test | 3 +- lld/test/COFF/arm64ec-entry-thunk.s | 3 +- lld/test/COFF/arm64ec-lib.test | 4 +- lld/test/COFF/arm64ec-range-thunks.s | 9 ++- lld/test/COFF/arm64x-symtab.s | 38 +++++++++ 9 files changed, 143 insertions(+), 63 deletions(-) create mode 100644 lld/test/COFF/arm64x-symtab.s diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h index 5d89e97a7f776..bdd625b8c3916 100644 --- a/lld/COFF/COFFLinkerContext.h +++ b/lld/COFF/COFFLinkerContext.h @@ -32,6 +32,27 @@ class COFFLinkerContext : public CommonLinkerContext { SymbolTable symtab; COFFOptTable optTable; + // A hybrid ARM64EC symbol table on ARM64X target. + std::optional hybridSymtab; + + // Pointer to the ARM64EC symbol table: either symtab for an ARM64EC target or + // hybridSymtab for an ARM64X target. + SymbolTable *symtabEC = nullptr; + + // Returns the appropriate symbol table for the specified machine type. + SymbolTable &getSymtab(llvm::COFF::MachineTypes machine) { + if (hybridSymtab && (machine == ARM64EC || machine == AMD64)) + return *hybridSymtab; + return symtab; + } + + // Invoke the specified callback for each symbol table. + void forEachSymtab(std::function f) { + f(symtab); + if (hybridSymtab) + f(*hybridSymtab); + } + std::vector objFileInstances; std::map pdbInputFileInstances; std::vector importFileInstances; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index b06da846bd3a8..bef55abb7f856 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -595,7 +595,17 @@ void LinkerDriver::setMachine(MachineTypes machine) { assert(machine != IMAGE_FILE_MACHINE_UNKNOWN); ctx.config.machine = machine; - ctx.symtab.machine = machine; + + if (machine != ARM64X) { + ctx.symtab.machine = machine; + if (machine == ARM64EC) + ctx.symtabEC = &ctx.symtab; + } else { + ctx.symtab.machine = ARM64; + ctx.hybridSymtab.emplace(ctx, ARM64EC); + ctx.symtabEC = &*ctx.hybridSymtab; + } + addWinSysRootLibSearchPaths(); } @@ -2518,54 +2528,56 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (config->imageBase == uint64_t(-1)) config->imageBase = getDefaultImageBase(); - ctx.symtab.addSynthetic(mangle("__ImageBase"), nullptr); - if (config->machine == I386) { - ctx.symtab.addAbsolute("___safe_se_handler_table", 0); - ctx.symtab.addAbsolute("___safe_se_handler_count", 0); - } - - ctx.symtab.addAbsolute(mangle("__guard_fids_count"), 0); - ctx.symtab.addAbsolute(mangle("__guard_fids_table"), 0); - ctx.symtab.addAbsolute(mangle("__guard_flags"), 0); - ctx.symtab.addAbsolute(mangle("__guard_iat_count"), 0); - ctx.symtab.addAbsolute(mangle("__guard_iat_table"), 0); - ctx.symtab.addAbsolute(mangle("__guard_longjmp_count"), 0); - ctx.symtab.addAbsolute(mangle("__guard_longjmp_table"), 0); - // Needed for MSVC 2017 15.5 CRT. - ctx.symtab.addAbsolute(mangle("__enclave_config"), 0); - // Needed for MSVC 2019 16.8 CRT. - ctx.symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0); - ctx.symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0); - - if (isArm64EC(config->machine)) { - ctx.symtab.addAbsolute("__arm64x_extra_rfe_table", 0); - ctx.symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0); - ctx.symtab.addAbsolute("__arm64x_redirection_metadata", 0); - ctx.symtab.addAbsolute("__arm64x_redirection_metadata_count", 0); - ctx.symtab.addAbsolute("__hybrid_auxiliary_delayload_iat_copy", 0); - ctx.symtab.addAbsolute("__hybrid_auxiliary_delayload_iat", 0); - ctx.symtab.addAbsolute("__hybrid_auxiliary_iat", 0); - ctx.symtab.addAbsolute("__hybrid_auxiliary_iat_copy", 0); - ctx.symtab.addAbsolute("__hybrid_code_map", 0); - ctx.symtab.addAbsolute("__hybrid_code_map_count", 0); - ctx.symtab.addAbsolute("__hybrid_image_info_bitfield", 0); - ctx.symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0); - ctx.symtab.addAbsolute("__x64_code_ranges_to_entry_points_count", 0); - ctx.symtab.addSynthetic("__guard_check_icall_a64n_fptr", nullptr); - ctx.symtab.addSynthetic("__arm64x_native_entrypoint", nullptr); - } - - if (config->pseudoRelocs) { - ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0); - ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0); - } - if (config->mingw) { - ctx.symtab.addAbsolute(mangle("__CTOR_LIST__"), 0); - ctx.symtab.addAbsolute(mangle("__DTOR_LIST__"), 0); - } - if (config->debug || config->buildIDHash != BuildIDHash::None) - if (ctx.symtab.findUnderscore("__buildid")) - ctx.symtab.addUndefined(mangle("__buildid")); + ctx.forEachSymtab([&](SymbolTable &symtab) { + symtab.addSynthetic(mangle("__ImageBase"), nullptr); + if (symtab.machine == I386) { + symtab.addAbsolute("___safe_se_handler_table", 0); + symtab.addAbsolute("___safe_se_handler_count", 0); + } + + symtab.addAbsolute(mangle("__guard_fids_count"), 0); + symtab.addAbsolute(mangle("__guard_fids_table"), 0); + symtab.addAbsolute(mangle("__guard_flags"), 0); + symtab.addAbsolute(mangle("__guard_iat_count"), 0); + symtab.addAbsolute(mangle("__guard_iat_table"), 0); + symtab.addAbsolute(mangle("__guard_longjmp_count"), 0); + symtab.addAbsolute(mangle("__guard_longjmp_table"), 0); + // Needed for MSVC 2017 15.5 CRT. + symtab.addAbsolute(mangle("__enclave_config"), 0); + // Needed for MSVC 2019 16.8 CRT. + symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0); + symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0); + + if (isArm64EC(ctx.config.machine)) { + symtab.addAbsolute("__arm64x_extra_rfe_table", 0); + symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0); + symtab.addAbsolute("__arm64x_redirection_metadata", 0); + symtab.addAbsolute("__arm64x_redirection_metadata_count", 0); + symtab.addAbsolute("__hybrid_auxiliary_delayload_iat_copy", 0); + symtab.addAbsolute("__hybrid_auxiliary_delayload_iat", 0); + symtab.addAbsolute("__hybrid_auxiliary_iat", 0); + symtab.addAbsolute("__hybrid_auxiliary_iat_copy", 0); + symtab.addAbsolute("__hybrid_code_map", 0); + symtab.addAbsolute("__hybrid_code_map_count", 0); + symtab.addAbsolute("__hybrid_image_info_bitfield", 0); + symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0); + symtab.addAbsolute("__x64_code_ranges_to_entry_points_count", 0); + symtab.addSynthetic("__guard_check_icall_a64n_fptr", nullptr); + symtab.addSynthetic("__arm64x_native_entrypoint", nullptr); + } + + if (config->pseudoRelocs) { + symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0); + symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0); + } + if (config->mingw) { + symtab.addAbsolute(mangle("__CTOR_LIST__"), 0); + symtab.addAbsolute(mangle("__DTOR_LIST__"), 0); + } + if (config->debug || config->buildIDHash != BuildIDHash::None) + if (symtab.findUnderscore("__buildid")) + symtab.addUndefined(mangle("__buildid")); + }); // This code may add new undefined symbols to the link, which may enqueue more // symbol resolution tasks, so we need to continue executing tasks until we @@ -2808,7 +2820,8 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (auto *arg = args.getLastArg(OPT_print_symbol_order)) config->printSymbolOrder = arg->getValue(); - ctx.symtab.initializeECThunks(); + if (ctx.symtabEC) + ctx.symtabEC->initializeECThunks(); // Identify unreferenced COMDAT sections. if (config->doGC) { diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 6ce075be798e4..3de2cdb0e53ee 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -114,15 +114,15 @@ void ArchiveFile::parse() { file = CHECK(Archive::create(mb), this); // Try to read symbols from ECSYMBOLS section on ARM64EC. - if (isArm64EC(ctx.config.machine)) { + if (ctx.symtabEC) { iterator_range symbols = CHECK(file->ec_symbols(), this); if (!symbols.empty()) { for (const Archive::Symbol &sym : symbols) - ctx.symtab.addLazyArchive(this, sym); + ctx.symtabEC->addLazyArchive(this, sym); // Read both EC and native symbols on ARM64X. - if (ctx.config.machine != ARM64X) + if (!ctx.hybridSymtab) return; } } @@ -177,7 +177,8 @@ ObjFile *ObjFile::create(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy) { Fatal(ctx) << m.getBufferIdentifier() << " is not a COFF file"; bin->release(); - return make(ctx.symtab, obj, lazy); + return make(ctx.getSymtab(MachineTypes(obj->getMachine())), obj, + lazy); } void ObjFile::parseLazy() { diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 40dbb99ae9cdd..b694893b903aa 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -47,7 +47,9 @@ class Symbol; // There is one add* function per symbol type. class SymbolTable { public: - SymbolTable(COFFLinkerContext &c) : ctx(c) {} + SymbolTable(COFFLinkerContext &c, + llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN) + : ctx(c), machine(machine) {} void addFile(InputFile *file); @@ -120,7 +122,7 @@ class SymbolTable { uint32_t newSectionOffset = 0); COFFLinkerContext &ctx; - llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN; + llvm::COFF::MachineTypes machine; bool isEC() const { return machine == ARM64EC; } diff --git a/lld/test/COFF/arm64ec-codemap.test b/lld/test/COFF/arm64ec-codemap.test index 6e97c14e87d86..2d79538f0a7eb 100644 --- a/lld/test/COFF/arm64ec-codemap.test +++ b/lld/test/COFF/arm64ec-codemap.test @@ -9,6 +9,7 @@ RUN: llvm-mc -filetype=obj -triple=arm64ec-windows data-sec2.s -o data-sec2.obj RUN: llvm-mc -filetype=obj -triple=arm64ec-windows empty-sec.s -o arm64ec-empty-sec.obj RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-func-sym.s -o x86_64-func-sym.obj RUN: llvm-mc -filetype=obj -triple=x86_64-windows empty-sec.s -o x86_64-empty-sec.obj +RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64.obj RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj Link ARM64EC DLL and verify that the code is arranged as expected. @@ -51,7 +52,7 @@ RUN: llvm-readobj --coff-load-config test2.dll | FileCheck -check-prefix=CODEMAP RUN: llvm-objdump -d test2.dll | FileCheck -check-prefix=DISASM %s RUN: lld-link -out:testx.dll -machine:arm64x arm64-func-sym.obj arm64ec-func-sym.obj \ -RUN: x86_64-func-sym.obj loadconfig-arm64ec.obj -dll -noentry +RUN: x86_64-func-sym.obj loadconfig-arm64.obj loadconfig-arm64ec.obj -dll -noentry Adding empty chunks does not affect code map ranges. diff --git a/lld/test/COFF/arm64ec-entry-thunk.s b/lld/test/COFF/arm64ec-entry-thunk.s index 164e6cc439648..bf5cb42755b62 100644 --- a/lld/test/COFF/arm64ec-entry-thunk.s +++ b/lld/test/COFF/arm64ec-entry-thunk.s @@ -27,6 +27,7 @@ thunk: .rva func // RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadcfg.obj +// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64ec.s -o native-loadcfg.obj // RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-simple.s -o test-simple.obj // RUN: lld-link -machine:arm64ec -dll -noentry -out:out-simple.dll loadcfg.obj test-simple.obj // RUN: llvm-objdump -d out-simple.dll | FileCheck --check-prefix=DISASM %s @@ -43,7 +44,7 @@ thunk: // RUN: llvm-readobj --sections out-simple.dll | FileCheck --check-prefix=HYBMP %s // HYBMP-NOT: .hybmp -// RUN: lld-link -machine:arm64x -dll -noentry -out:out-simplex.dll loadcfg.obj test-simple.obj +// RUN: lld-link -machine:arm64x -dll -noentry -out:out-simplex.dll native-loadcfg.obj loadcfg.obj test-simple.obj // RUN: llvm-objdump -d out-simplex.dll | FileCheck --check-prefix=DISASM %s #--- test-split-func.s diff --git a/lld/test/COFF/arm64ec-lib.test b/lld/test/COFF/arm64ec-lib.test index 9dff23022f9ee..ea07d28f1a411 100644 --- a/lld/test/COFF/arm64ec-lib.test +++ b/lld/test/COFF/arm64ec-lib.test @@ -11,6 +11,7 @@ RUN: llvm-mc -filetype=obj -triple=arm64ec-windows ref-alias.s -o ref-alias.obj RUN: llvm-mc -filetype=obj -triple=arm64ec-windows ref-thunk.s -o ref-thunk.obj RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func.s -o func.obj RUN: llvm-mc -filetype=obj -triple=x86_64-windows func-x86_64.s -o func-x86_64.obj +RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64.obj RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj RUN: llvm-lib -machine:arm64ec -out:sym-arm64ec.lib sym-arm64ec.obj nsym-aarch64.obj @@ -26,7 +27,8 @@ Verify that a symbol can be referenced from a regular archive map when ECSYMBOLS RUN: lld-link -machine:arm64ec -dll -noentry -out:test2.dll symref-arm64ec.obj sym-x86_64.lib loadconfig-arm64ec.obj Verify that both native and EC symbols can be referenced in a hybrid target. -RUN: lld-link -machine:arm64x -dll -noentry -out:test3.dll symref-arm64ec.obj nsymref-aarch64.obj sym-arm64ec.lib loadconfig-arm64ec.obj +RUN: lld-link -machine:arm64x -dll -noentry -out:test3.dll symref-arm64ec.obj nsymref-aarch64.obj sym-arm64ec.lib \ +RUN: loadconfig-arm64.obj loadconfig-arm64ec.obj Ensure that an EC symbol is not resolved using a regular symbol map. RUN: not lld-link -machine:arm64ec -dll -noentry -out:test-err.dll nsymref-arm64ec.obj sym-arm64ec.lib loadconfig-arm64ec.obj 2>&1 |\ diff --git a/lld/test/COFF/arm64ec-range-thunks.s b/lld/test/COFF/arm64ec-range-thunks.s index f170349d7965d..09d9b013f97a5 100644 --- a/lld/test/COFF/arm64ec-range-thunks.s +++ b/lld/test/COFF/arm64ec-range-thunks.s @@ -5,6 +5,7 @@ # RUN: llvm-mc -filetype=obj -triple=aarch64-windows native-funcs.s -o funcs-aarch64.obj # RUN: llvm-mc -filetype=obj -triple=x86_64-windows space.s -o space-x86_64.obj # RUN: llvm-mc -filetype=obj -triple=aarch64-windows space.s -o space-aarch64.obj +# RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64.obj # RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj @@ -59,8 +60,8 @@ # A similar test using a hybrid binary and native placeholder chunks. -# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64ec.obj -out:testx.dll \ -# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s +# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64.obj loadconfig-arm64ec.obj \ +# RUN: -out:testx.dll -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s # RUN: llvm-objdump -d testx.dll | FileCheck --check-prefix=DISASM %s # RUN: llvm-readobj --coff-load-config testx.dll | FileCheck --check-prefix=LOADCFGX %s @@ -74,8 +75,8 @@ # Test a hybrid ARM64X binary which requires range extension thunks for both native and EC relocations. -# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64ec.obj -out:testx2.dll \ -# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s +# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64.obj loadconfig-arm64ec.obj \ +# RUN: -out:testx2.dll -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s # VERBOSEX: Added 5 thunks with margin {{.*}} in 1 passes # RUN: llvm-objdump -d testx2.dll | FileCheck --check-prefix=DISASMX %s diff --git a/lld/test/COFF/arm64x-symtab.s b/lld/test/COFF/arm64x-symtab.s new file mode 100644 index 0000000000000..e7beb0287d7ff --- /dev/null +++ b/lld/test/COFF/arm64x-symtab.s @@ -0,0 +1,38 @@ +// REQUIRES: aarch64, x86 +// RUN: split-file %s %t.dir && cd %t.dir + +// RUN: llvm-mc -filetype=obj -triple=aarch64-windows sym.s -o sym-aarch64.obj +// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows sym.s -o sym-arm64ec.obj +// RUN: llvm-mc -filetype=obj -triple=x86_64-windows sym.s -o sym-x86_64.obj +// RUN: llvm-mc -filetype=obj -triple=aarch64-windows symref.s -o symref-aarch64.obj +// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows symref.s -o symref-arm64ec.obj +// RUN: llvm-lib -machine:arm64x -out:sym.lib sym-aarch64.obj sym-arm64ec.obj + +// Check that native object files can't reference EC symbols. + +// RUN: not lld-link -machine:arm64x -dll -noentry -out:err1.dll symref-aarch64.obj sym-arm64ec.obj \ +// RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s +// UNDEF: lld-link: error: undefined symbol: sym +// UNDEF-NEXT: >>> referenced by symref-aarch64.obj:(.data) + +// RUN: not lld-link -machine:arm64x -dll -noentry -out:err2.dll symref-aarch64.obj sym-x86_64.obj \ +// RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s + +// Check that ARM64X target can have the same symbol names in both native and EC namespaces. + +// RUN: lld-link -machine:arm64x -dll -noentry -out:out.dll symref-aarch64.obj sym-aarch64.obj \ +// RUN: symref-arm64ec.obj sym-x86_64.obj + +// Check that ARM64X target can reference both native and EC symbols from an archive. + +// RUN: lld-link -machine:arm64x -dll -noentry -out:out2.dll symref-aarch64.obj symref-arm64ec.obj sym.lib + +#--- symref.s + .data + .rva sym + +#--- sym.s + .data + .globl sym +sym: + .word 0 From e8ce6c4e69745b1b2cd6f7479c48fbae44622cb3 Mon Sep 17 00:00:00 2001 From: Philipp van Kempen Date: Tue, 17 Dec 2024 21:20:17 +0100 Subject: [PATCH 17/35] [RISCV] Fix typo in CV_SH_rr_inc pattern (#120246) This typo in https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td#L701:L701 caused a compiler crash in 'RISC-V Assembly Printer' because CV_SH_ri_inc was selected, leading to `getImmOpValue` being called for a register operand. This bug did not affect the Assembler output and therefore does not trigger any existing unit tests, but is visible by examining the final MIR function. --- llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index 4478e24611108..b98934d8c6396 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -698,7 +698,7 @@ let Predicates = [HasVendorXCVmem, IsRV32], AddedComplexity = 1 in { def : CVStriPat; def : CVStrriPat; - def : CVStrriPat; + def : CVStrriPat; def : CVStrriPat; def : CVStrrPat; From c98e79d85663d6a5a5fcd6475b662a750fcf4b8e Mon Sep 17 00:00:00 2001 From: Shourya Goel Date: Wed, 18 Dec 2024 02:04:50 +0530 Subject: [PATCH 18/35] [libc][complex] Implement different flavors of the `cproj` function (#119722) Refer section 7.3.9.5 of ISO/IEC 9899:2023 --- libc/config/linux/aarch64/entrypoints.txt | 5 + libc/config/linux/arm/entrypoints.txt | 3 + libc/config/linux/riscv/entrypoints.txt | 4 + libc/config/linux/x86_64/entrypoints.txt | 5 + libc/docs/headers/complex.rst | 2 +- libc/hdrgen/yaml/complex.yaml | 32 ++++++ libc/src/__support/CMakeLists.txt | 5 + libc/src/__support/complex_type.h | 15 +++ libc/src/complex/CMakeLists.txt | 6 + libc/src/complex/conjf128.h | 1 - libc/src/complex/conjf16.h | 1 - libc/src/complex/cproj.h | 20 ++++ libc/src/complex/cprojf.h | 20 ++++ libc/src/complex/cprojf128.h | 26 +++++ libc/src/complex/cprojf16.h | 26 +++++ libc/src/complex/cprojl.h | 20 ++++ libc/src/complex/generic/CMakeLists.txt | 69 +++++++++++- libc/src/complex/generic/conj.cpp | 1 - libc/src/complex/generic/conjf.cpp | 1 - libc/src/complex/generic/conjf128.cpp | 1 - libc/src/complex/generic/conjf16.cpp | 1 - libc/src/complex/generic/conjl.cpp | 1 - libc/src/complex/generic/cproj.cpp | 19 ++++ libc/src/complex/generic/cprojf.cpp | 19 ++++ libc/src/complex/generic/cprojf128.cpp | 23 ++++ libc/src/complex/generic/cprojf16.cpp | 23 ++++ libc/src/complex/generic/cprojl.cpp | 19 ++++ libc/test/src/complex/CMakeLists.txt | 60 ++++++++++ libc/test/src/complex/CprojTest.h | 131 ++++++++++++++++++++++ libc/test/src/complex/cproj_test.cpp | 13 +++ libc/test/src/complex/cprojf128_test.cpp | 17 +++ libc/test/src/complex/cprojf16_test.cpp | 17 +++ libc/test/src/complex/cprojf_test.cpp | 13 +++ libc/test/src/complex/cprojl_test.cpp | 13 +++ 34 files changed, 619 insertions(+), 13 deletions(-) create mode 100644 libc/src/complex/cproj.h create mode 100644 libc/src/complex/cprojf.h create mode 100644 libc/src/complex/cprojf128.h create mode 100644 libc/src/complex/cprojf16.h create mode 100644 libc/src/complex/cprojl.h create mode 100644 libc/src/complex/generic/cproj.cpp create mode 100644 libc/src/complex/generic/cprojf.cpp create mode 100644 libc/src/complex/generic/cprojf128.cpp create mode 100644 libc/src/complex/generic/cprojf16.cpp create mode 100644 libc/src/complex/generic/cprojl.cpp create mode 100644 libc/test/src/complex/CprojTest.h create mode 100644 libc/test/src/complex/cproj_test.cpp create mode 100644 libc/test/src/complex/cprojf128_test.cpp create mode 100644 libc/test/src/complex/cprojf16_test.cpp create mode 100644 libc/test/src/complex/cprojf_test.cpp create mode 100644 libc/test/src/complex/cprojl_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index c2179938b40d1..b949e4b4f67ba 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -370,6 +370,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.complex.conj libc.src.complex.conjf libc.src.complex.conjl + libc.src.complex.cproj + libc.src.complex.cprojf + libc.src.complex.cprojl # fenv.h entrypoints libc.src.fenv.feclearexcept @@ -622,6 +625,7 @@ if(LIBC_TYPES_HAS_FLOAT16) # libc.src.complex.crealf16 # libc.src.complex.cimagf16 # libc.src.complex.conjf16 + # libc.src.complex.cprojf16 # math.h C23 _Float16 entrypoints libc.src.math.canonicalizef16 @@ -728,6 +732,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.complex.crealf128 libc.src.complex.cimagf128 libc.src.complex.conjf128 + libc.src.complex.cprojf128 # math.h C23 _Float128 entrypoints libc.src.math.canonicalizef128 diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index f5e9827727396..83f4dfaaa2d0f 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -212,6 +212,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.complex.conj libc.src.complex.conjf libc.src.complex.conjl + libc.src.complex.cproj + libc.src.complex.cprojf + libc.src.complex.cprojl # fenv.h entrypoints libc.src.fenv.feclearexcept diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index e2df6aca38bf9..19980f79e7be8 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -367,6 +367,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.complex.conj libc.src.complex.conjf libc.src.complex.conjl + libc.src.complex.cproj + libc.src.complex.cprojf + libc.src.complex.cprojl # fenv.h entrypoints libc.src.fenv.feclearexcept @@ -623,6 +626,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.complex.crealf128 libc.src.complex.cimagf128 libc.src.complex.conjf128 + libc.src.complex.cprojf128 # math.h C23 _Float128 entrypoints libc.src.math.canonicalizef128 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index e236ad62261d8..08d8559d8c81a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -370,6 +370,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.complex.conj libc.src.complex.conjf libc.src.complex.conjl + libc.src.complex.cproj + libc.src.complex.cprojf + libc.src.complex.cprojl # fenv.h entrypoints libc.src.fenv.feclearexcept @@ -627,6 +630,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.complex.crealf16 libc.src.complex.cimagf16 libc.src.complex.conjf16 + libc.src.complex.cprojf16 # math.h C23 _Float16 entrypoints libc.src.math.canonicalizef16 @@ -738,6 +742,7 @@ if(LIBC_TYPES_HAS_FLOAT128) # libc.src.complex.crealf128 # libc.src.complex.cimagf128 # libc.src.complex.conjf128 + # libc.src.complex.cprojf128 # math.h C23 _Float128 entrypoints libc.src.math.canonicalizef128 diff --git a/libc/docs/headers/complex.rst b/libc/docs/headers/complex.rst index b6a340543fad1..272cf00c883bc 100644 --- a/libc/docs/headers/complex.rst +++ b/libc/docs/headers/complex.rst @@ -59,7 +59,7 @@ Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | conj | |check| | |check| | |check| | |check| | |check| | 7.3.9.4 | N/A | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| cproj | | | | | | 7.3.9.5 | N/A | +| cproj | |check| | |check| | |check| | |check| | |check| | 7.3.9.5 | N/A | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | creal | |check| | |check| | |check| | |check| | |check| | 7.3.9.6 | N/A | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/hdrgen/yaml/complex.yaml b/libc/hdrgen/yaml/complex.yaml index be0d3c9ae59b4..cd81de7dd9e20 100644 --- a/libc/hdrgen/yaml/complex.yaml +++ b/libc/hdrgen/yaml/complex.yaml @@ -103,3 +103,35 @@ functions: arguments: - type: cfloat128 guard: LIBC_TYPES_HAS_CFLOAT128 + - name: cproj + standards: + - stdc + return_type: _Complex double + arguments: + - type: _Complex double + - name: cprojf + standards: + - stdc + return_type: _Complex float + arguments: + - type: _Complex float + - name: cprojl + standards: + - stdc + return_type: _Complex long double + arguments: + - type: _Complex long double + - name: cprojf16 + standards: + - stdc + return_type: cfloat16 + arguments: + - type: cfloat16 + guard: LIBC_TYPES_HAS_CFLOAT16 + - name: cprojf128 + standards: + - stdc + return_type: cfloat128 + arguments: + - type: cfloat128 + guard: LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 70ed67c156d1a..4e90aad9a45b4 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -241,6 +241,11 @@ add_header_library( complex_type HDRS complex_type.h + DEPENDS + libc.src.__support.CPP.bit + libc.src.__support.FPUtil.fp_bits + libc.src.__support.macros.properties.types + libc.src.__support.macros.properties.complex_types ) add_header_library( diff --git a/libc/src/__support/complex_type.h b/libc/src/__support/complex_type.h index 5dadfd20a0051..f72ce8a4efd13 100644 --- a/libc/src/__support/complex_type.h +++ b/libc/src/__support/complex_type.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_COMPLEX_TYPE_H #define LLVM_LIBC_SRC___SUPPORT_COMPLEX_TYPE_H +#include "src/__support/CPP/bit.h" +#include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/config.h" #include "src/__support/macros/properties/complex_types.h" #include "src/__support/macros/properties/types.h" @@ -75,5 +77,18 @@ template LIBC_INLINE constexpr T conjugate(T c) { return cpp::bit_cast(c_c); } +template LIBC_INLINE constexpr T project(T c) { + using real_t = make_real_t; + Complex c_c = cpp::bit_cast>(c); + if (fputil::FPBits(c_c.real).is_inf() || + fputil::FPBits(c_c.imag).is_inf()) { + return cpp::bit_cast( + Complex{(fputil::FPBits::inf(Sign::POS).get_val()), + static_cast(c_c.imag > 0 ? 0.0 : -0.0)}); + } else { + return c; + } +} + } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC___SUPPORT_COMPLEX_TYPE_H diff --git a/libc/src/complex/CMakeLists.txt b/libc/src/complex/CMakeLists.txt index 7306e2fe925e3..bc66a5445d727 100644 --- a/libc/src/complex/CMakeLists.txt +++ b/libc/src/complex/CMakeLists.txt @@ -30,3 +30,9 @@ add_complex_entrypoint_object(conjf) add_complex_entrypoint_object(conjl) add_complex_entrypoint_object(conjf16) add_complex_entrypoint_object(conjf128) + +add_complex_entrypoint_object(cproj) +add_complex_entrypoint_object(cprojf) +add_complex_entrypoint_object(cprojl) +add_complex_entrypoint_object(cprojf16) +add_complex_entrypoint_object(cprojf128) diff --git a/libc/src/complex/conjf128.h b/libc/src/complex/conjf128.h index 587c979d315ef..c1ae0b03d067a 100644 --- a/libc/src/complex/conjf128.h +++ b/libc/src/complex/conjf128.h @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/__support/macros/properties/complex_types.h" -#include "src/__support/macros/properties/types.h" #if defined(LIBC_TYPES_HAS_CFLOAT128) diff --git a/libc/src/complex/conjf16.h b/libc/src/complex/conjf16.h index b15c5b3f61f4a..685ac8ac5c858 100644 --- a/libc/src/complex/conjf16.h +++ b/libc/src/complex/conjf16.h @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/__support/macros/properties/complex_types.h" -#include "src/__support/macros/properties/types.h" #if defined(LIBC_TYPES_HAS_CFLOAT16) diff --git a/libc/src/complex/cproj.h b/libc/src/complex/cproj.h new file mode 100644 index 0000000000000..62d41bceec3e3 --- /dev/null +++ b/libc/src/complex/cproj.h @@ -0,0 +1,20 @@ +//===-- Implementation header for cproj -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_COMPLEX_CPROJ_H +#define LLVM_LIBC_SRC_COMPLEX_CPROJ_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +_Complex double cproj(_Complex double x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_COMPLEX_CPROJ_H diff --git a/libc/src/complex/cprojf.h b/libc/src/complex/cprojf.h new file mode 100644 index 0000000000000..76124f9117776 --- /dev/null +++ b/libc/src/complex/cprojf.h @@ -0,0 +1,20 @@ +//===-- Implementation header for cprojf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_COMPLEX_CPROJF_H +#define LLVM_LIBC_SRC_COMPLEX_CPROJF_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +_Complex float cprojf(_Complex float x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_COMPLEX_CPROJF_H diff --git a/libc/src/complex/cprojf128.h b/libc/src/complex/cprojf128.h new file mode 100644 index 0000000000000..5f7fe992ef30b --- /dev/null +++ b/libc/src/complex/cprojf128.h @@ -0,0 +1,26 @@ +//===-- Implementation header for cprojf128 ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/macros/properties/complex_types.h" + +#if defined(LIBC_TYPES_HAS_CFLOAT128) + +#ifndef LLVM_LIBC_SRC_COMPLEX_CPROJF128_H +#define LLVM_LIBC_SRC_COMPLEX_CPROJF128_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +cfloat128 cprojf128(cfloat128 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_COMPLEX_CPROJF128_H + +#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/cprojf16.h b/libc/src/complex/cprojf16.h new file mode 100644 index 0000000000000..8cce5f0bcef2b --- /dev/null +++ b/libc/src/complex/cprojf16.h @@ -0,0 +1,26 @@ +//===-- Implementation header for cprojf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/macros/properties/complex_types.h" + +#if defined(LIBC_TYPES_HAS_CFLOAT16) + +#ifndef LLVM_LIBC_SRC_COMPLEX_CPROJF16_H +#define LLVM_LIBC_SRC_COMPLEX_CPROJF16_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +cfloat16 cprojf16(cfloat16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_COMPLEX_CPROJF16_H + +#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/cprojl.h b/libc/src/complex/cprojl.h new file mode 100644 index 0000000000000..ecc8dce8f8535 --- /dev/null +++ b/libc/src/complex/cprojl.h @@ -0,0 +1,20 @@ +//===-- Implementation header for cprojl ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_COMPLEX_CPROJL_H +#define LLVM_LIBC_SRC_COMPLEX_CPROJL_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +_Complex long double cprojl(_Complex long double x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_COMPLEX_CPROJL_H diff --git a/libc/src/complex/generic/CMakeLists.txt b/libc/src/complex/generic/CMakeLists.txt index cc14f89122edd..3dae6f8a61495 100644 --- a/libc/src/complex/generic/CMakeLists.txt +++ b/libc/src/complex/generic/CMakeLists.txt @@ -1,3 +1,67 @@ +add_entrypoint_object( + cproj + SRCS + cproj.cpp + HDRS + ../cproj.h + COMPILE_OPTIONS + ${libc_opt_high_flag} + DEPENDS + libc.src.__support.complex_type +) + +add_entrypoint_object( + cprojf + SRCS + cprojf.cpp + HDRS + ../cprojf.h + COMPILE_OPTIONS + ${libc_opt_high_flag} + DEPENDS + libc.src.__support.complex_type +) + +add_entrypoint_object( + cprojl + SRCS + cprojl.cpp + HDRS + ../cprojl.h + COMPILE_OPTIONS + ${libc_opt_high_flag} + DEPENDS + libc.src.__support.complex_type +) + +add_entrypoint_object( + cprojf16 + SRCS + cprojf16.cpp + HDRS + ../cprojf16.h + COMPILE_OPTIONS + ${libc_opt_high_flag} + DEPENDS + libc.src.__support.complex_type + libc.src.__support.macros.properties.types + libc.src.__support.macros.properties.complex_types +) + +add_entrypoint_object( + cprojf128 + SRCS + cprojf128.cpp + HDRS + ../cprojf128.h + COMPILE_OPTIONS + ${libc_opt_high_flag} + DEPENDS + libc.src.__support.complex_type + libc.src.__support.macros.properties.types + libc.src.__support.macros.properties.complex_types +) + add_entrypoint_object( conj SRCS @@ -7,7 +71,6 @@ add_entrypoint_object( COMPILE_OPTIONS ${libc_opt_high_flag} DEPENDS - libc.src.__support.CPP.bit libc.src.__support.complex_type ) @@ -20,7 +83,6 @@ add_entrypoint_object( COMPILE_OPTIONS ${libc_opt_high_flag} DEPENDS - libc.src.__support.CPP.bit libc.src.__support.complex_type ) @@ -33,7 +95,6 @@ add_entrypoint_object( COMPILE_OPTIONS ${libc_opt_high_flag} DEPENDS - libc.src.__support.CPP.bit libc.src.__support.complex_type ) @@ -46,7 +107,6 @@ add_entrypoint_object( COMPILE_OPTIONS ${libc_opt_high_flag} DEPENDS - libc.src.__support.CPP.bit libc.src.__support.complex_type libc.src.__support.macros.properties.types libc.src.__support.macros.properties.complex_types @@ -61,7 +121,6 @@ add_entrypoint_object( COMPILE_OPTIONS ${libc_opt_high_flag} DEPENDS - libc.src.__support.CPP.bit libc.src.__support.complex_type libc.src.__support.macros.properties.types libc.src.__support.macros.properties.complex_types diff --git a/libc/src/complex/generic/conj.cpp b/libc/src/complex/generic/conj.cpp index 1a93bc25dc3c4..cbcd480d6efa5 100644 --- a/libc/src/complex/generic/conj.cpp +++ b/libc/src/complex/generic/conj.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/conj.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" diff --git a/libc/src/complex/generic/conjf.cpp b/libc/src/complex/generic/conjf.cpp index 33cb34340a04e..a1af3d78ebc6a 100644 --- a/libc/src/complex/generic/conjf.cpp +++ b/libc/src/complex/generic/conjf.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/conjf.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" diff --git a/libc/src/complex/generic/conjf128.cpp b/libc/src/complex/generic/conjf128.cpp index 4e35b3d5a97b7..c65b54849f52e 100644 --- a/libc/src/complex/generic/conjf128.cpp +++ b/libc/src/complex/generic/conjf128.cpp @@ -9,7 +9,6 @@ #include "src/complex/conjf128.h" #if defined(LIBC_TYPES_HAS_CFLOAT128) -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" diff --git a/libc/src/complex/generic/conjf16.cpp b/libc/src/complex/generic/conjf16.cpp index 2564fe252027a..dac11e27b30a2 100644 --- a/libc/src/complex/generic/conjf16.cpp +++ b/libc/src/complex/generic/conjf16.cpp @@ -9,7 +9,6 @@ #include "src/complex/conjf16.h" #if defined(LIBC_TYPES_HAS_CFLOAT16) -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" diff --git a/libc/src/complex/generic/conjl.cpp b/libc/src/complex/generic/conjl.cpp index dc071ab1ec51b..8298ede6fa38f 100644 --- a/libc/src/complex/generic/conjl.cpp +++ b/libc/src/complex/generic/conjl.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/conjl.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" diff --git a/libc/src/complex/generic/cproj.cpp b/libc/src/complex/generic/cproj.cpp new file mode 100644 index 0000000000000..d5e8c3ff3d9ec --- /dev/null +++ b/libc/src/complex/generic/cproj.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of cproj function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/complex/cproj.h" +#include "src/__support/common.h" +#include "src/__support/complex_type.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(_Complex double, cproj, (_Complex double x)) { + return project<_Complex double>(x); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/complex/generic/cprojf.cpp b/libc/src/complex/generic/cprojf.cpp new file mode 100644 index 0000000000000..d0235f6bfef7e --- /dev/null +++ b/libc/src/complex/generic/cprojf.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of cprojf function ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/complex/cprojf.h" +#include "src/__support/common.h" +#include "src/__support/complex_type.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(_Complex float, cprojf, (_Complex float x)) { + return project<_Complex float>(x); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/complex/generic/cprojf128.cpp b/libc/src/complex/generic/cprojf128.cpp new file mode 100644 index 0000000000000..97134b5523a56 --- /dev/null +++ b/libc/src/complex/generic/cprojf128.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of cprojf128 function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/complex/cprojf128.h" +#if defined(LIBC_TYPES_HAS_CFLOAT128) + +#include "src/__support/common.h" +#include "src/__support/complex_type.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(cfloat128, cprojf128, (cfloat128 x)) { + return project(x); +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/generic/cprojf16.cpp b/libc/src/complex/generic/cprojf16.cpp new file mode 100644 index 0000000000000..bd0425ffb5fe5 --- /dev/null +++ b/libc/src/complex/generic/cprojf16.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of cprojf16 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/complex/cprojf16.h" +#if defined(LIBC_TYPES_HAS_CFLOAT16) + +#include "src/__support/common.h" +#include "src/__support/complex_type.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(cfloat16, cprojf16, (cfloat16 x)) { + return project(x); +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/generic/cprojl.cpp b/libc/src/complex/generic/cprojl.cpp new file mode 100644 index 0000000000000..34deeb63b16d0 --- /dev/null +++ b/libc/src/complex/generic/cprojl.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of cprojl function ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/complex/cprojl.h" +#include "src/__support/common.h" +#include "src/__support/complex_type.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(_Complex long double, cprojl, (_Complex long double x)) { + return project<_Complex long double>(x); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/complex/CMakeLists.txt b/libc/test/src/complex/CMakeLists.txt index 0c668d9e1e8b9..d6b62e4686a22 100644 --- a/libc/test/src/complex/CMakeLists.txt +++ b/libc/test/src/complex/CMakeLists.txt @@ -60,6 +60,66 @@ add_libc_test( LibcFPTestHelpers ) +add_libc_test( + cproj_test + SUITE + libc-complex-unittests + SRCS + cproj_test.cpp + DEPENDS + libc.src.complex.cproj + LINK_LIBRARIES + LibcFPTestHelpers +) + +add_libc_test( + cprojf_test + SUITE + libc-complex-unittests + SRCS + cprojf_test.cpp + DEPENDS + libc.src.complex.cprojf + LINK_LIBRARIES + LibcFPTestHelpers +) + +add_libc_test( + cprojl_test + SUITE + libc-complex-unittests + SRCS + cprojl_test.cpp + DEPENDS + libc.src.complex.cprojl + LINK_LIBRARIES + LibcFPTestHelpers +) + +add_libc_test( + cprojf16_test + SUITE + libc-complex-unittests + SRCS + cprojf16_test.cpp + DEPENDS + libc.src.complex.cprojf16 + LINK_LIBRARIES + LibcFPTestHelpers +) + +add_libc_test( + cprojf128_test + SUITE + libc-complex-unittests + SRCS + cprojf128_test.cpp + DEPENDS + libc.src.complex.cprojf128 + LINK_LIBRARIES + LibcFPTestHelpers +) + add_libc_test( creal_test SUITE diff --git a/libc/test/src/complex/CprojTest.h b/libc/test/src/complex/CprojTest.h new file mode 100644 index 0000000000000..4e2f6cc58a5ae --- /dev/null +++ b/libc/test/src/complex/CprojTest.h @@ -0,0 +1,131 @@ +//===-- Utility class to test different flavors of cproj --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_SRC_COMPLEX_CPROJTEST_H +#define LLVM_LIBC_TEST_SRC_COMPLEX_CPROJTEST_H + +#include "test/UnitTest/FEnvSafeTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "hdr/math_macros.h" + +template +class CprojTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { + + DECLARE_SPECIAL_CONSTANTS(FPT) + +public: + typedef CFPT (*CprojFunc)(CFPT); + + void testSpecialNumbers(CprojFunc func) { + EXPECT_CFP_EQ(func(CFPT(inf + 9024.2442i)), CFPT(inf + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(inf - 9024.2442i)), CFPT(inf - 0.0i)); + EXPECT_CFP_EQ(func(CFPT(neg_inf + 8923.124i)), CFPT(inf + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(neg_inf - 8923.124i)), CFPT(inf - 0.0i)); + EXPECT_CFP_EQ(func(CFPT(9024.2442 + inf * 1.0i)), CFPT(inf + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(9024.2442 + neg_inf * 1.0i)), CFPT(inf - 0.0i)); + EXPECT_CFP_EQ(func(CFPT(inf + neg_inf * 1.0i)), CFPT(inf - 0.0i)); + EXPECT_CFP_EQ(func(CFPT(inf + inf * 1.0i)), CFPT(inf + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(neg_inf + neg_inf * 1.0i)), CFPT(inf - 0.0i)); + EXPECT_CFP_EQ(func(CFPT(neg_inf + inf * 1.0i)), CFPT(inf + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(neg_inf + inf * 1.0i)), CFPT(inf + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(aNaN + inf * 1.0i)), CFPT(inf + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(aNaN + neg_inf * 1.0i)), CFPT(inf - 0.0i)); + EXPECT_CFP_EQ(func(CFPT(90.24 + inf * 1.0i)), CFPT(inf + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(89.12 + neg_inf * 1.0i)), CFPT(inf - 0.0i)); + + EXPECT_CFP_EQ(func(CFPT(aNaN + 67.123i)), CFPT(aNaN + 67.123i)); + EXPECT_CFP_EQ(func(CFPT(neg_aNaN + 78.319i)), CFPT(neg_aNaN + 78.319i)); + EXPECT_CFP_EQ(func(CFPT(sNaN + 7813.131i)), CFPT(sNaN + 7813.131i)); + EXPECT_CFP_EQ(func(CFPT(neg_sNaN + 7824.152i)), CFPT(neg_sNaN + 7824.152i)); + EXPECT_CFP_EQ(func(CFPT(min_normal + 782.124i)), + CFPT(min_normal + 782.124i)); + EXPECT_CFP_EQ(func(CFPT(max_normal + 2141.2352i)), + CFPT(max_normal + 2141.2352i)); + EXPECT_CFP_EQ(func(CFPT(neg_max_normal + 341.134i)), + CFPT(neg_max_normal + 341.134i)); + EXPECT_CFP_EQ(func(CFPT(min_denormal + 781.142i)), + CFPT(min_denormal + 781.142i)); + EXPECT_CFP_EQ(func(CFPT(neg_min_denormal + 781.134i)), + CFPT(neg_min_denormal + 781.134i)); + EXPECT_CFP_EQ(func(CFPT(max_denormal + 1241.112i)), + CFPT(max_denormal + 1241.112i)); + EXPECT_CFP_EQ(func(CFPT(zero + 121.121i)), CFPT(zero + 121.121i)); + EXPECT_CFP_EQ(func(CFPT(67.123 + aNaN * 1.0i)), CFPT(67.123 + aNaN * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(78.319 + neg_aNaN * 1.0i)), + CFPT(78.319 + neg_aNaN * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(7813.131 + sNaN * 1.0i)), + CFPT(7813.131 + sNaN * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(7824.152 + neg_sNaN * 1.0i)), + CFPT(7824.152 + neg_sNaN * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(782.124 + min_normal * 1.0i)), + CFPT(782.124 + min_normal * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(2141.2352 + max_normal * 1.0i)), + CFPT(2141.2352 + max_normal * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(341.134 + neg_max_normal * 1.0i)), + CFPT(341.134 + neg_max_normal * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(781.142 + min_denormal * 1.0i)), + CFPT(781.142 + min_denormal * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(781.134 + neg_min_denormal * 1.0i)), + CFPT(781.134 + neg_min_denormal * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(1241.112 + max_denormal * 1.0i)), + CFPT(1241.112 + max_denormal * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(121.121 + zero * 1.0i)), + CFPT(121.121 + zero * 1.0i)); + EXPECT_CFP_EQ(func(CFPT(0.0 - 0.0i)), CFPT(0.0 - 0.0i)); + EXPECT_CFP_EQ(func(CFPT(0.0 + 0.0i)), CFPT(0.0 + 0.0i)); + EXPECT_CFP_EQ(func(CFPT(-0.0 - 0.0i)), CFPT(-0.0 - 0.0i)); + EXPECT_CFP_EQ(func(CFPT(-0.0 + 0.0i)), CFPT(-0.0 + 0.0i)); + } + + void testRoundedNumbers(CprojFunc func) { + EXPECT_CFP_EQ(func((CFPT)(4523.1413 + 12413.1414i)), + CFPT(4523.1413 + 12413.1414i)); + EXPECT_CFP_EQ(func((CFPT)(-4523.1413 + 12413.1414i)), + CFPT(-4523.1413 + 12413.1414i)); + EXPECT_CFP_EQ(func((CFPT)(4523.1413 - 12413.1414i)), + CFPT(4523.1413 - 12413.1414i)); + EXPECT_CFP_EQ(func((CFPT)(-4523.1413 - 12413.1414i)), + CFPT(-4523.1413 - 12413.1414i)); + + EXPECT_CFP_EQ(func((CFPT)(3210.5678 + 9876.5432i)), + CFPT(3210.5678 + 9876.5432i)); + EXPECT_CFP_EQ(func((CFPT)(-3210.5678 + 9876.5432i)), + CFPT(-3210.5678 + 9876.5432i)); + EXPECT_CFP_EQ(func((CFPT)(3210.5678 - 9876.5432i)), + CFPT(3210.5678 - 9876.5432i)); + EXPECT_CFP_EQ(func((CFPT)(-3210.5678 - 9876.5432i)), + CFPT(-3210.5678 - 9876.5432i)); + + EXPECT_CFP_EQ(func((CFPT)(1234.4321 + 4321.1234i)), + CFPT(1234.4321 + 4321.1234i)); + EXPECT_CFP_EQ(func((CFPT)(-1234.4321 + 4321.1234i)), + CFPT(-1234.4321 + 4321.1234i)); + EXPECT_CFP_EQ(func((CFPT)(1234.4321 - 4321.1234i)), + CFPT(1234.4321 - 4321.1234i)); + EXPECT_CFP_EQ(func((CFPT)(-1234.4321 - 4321.1234i)), + CFPT(-1234.4321 - 4321.1234i)); + + EXPECT_CFP_EQ(func((CFPT)(6789.1234 + 8765.6789i)), + CFPT(6789.1234 + 8765.6789i)); + EXPECT_CFP_EQ(func((CFPT)(-6789.1234 + 8765.6789i)), + CFPT(-6789.1234 + 8765.6789i)); + EXPECT_CFP_EQ(func((CFPT)(6789.1234 - 8765.6789i)), + CFPT(6789.1234 - 8765.6789i)); + EXPECT_CFP_EQ(func((CFPT)(-6789.1234 - 8765.6789i)), + CFPT(-6789.1234 - 8765.6789i)); + } +}; + +#define LIST_CPROJ_TESTS(U, T, func) \ + using LlvmLibcCprojTest = CprojTest; \ + TEST_F(LlvmLibcCprojTest, SpecialNumbers) { testSpecialNumbers(&func); } \ + TEST_F(LlvmLibcCprojTest, RoundedNumbers) { testRoundedNumbers(&func); } + +#endif // LLVM_LIBC_TEST_SRC_COMPLEX_CPROJTEST_H diff --git a/libc/test/src/complex/cproj_test.cpp b/libc/test/src/complex/cproj_test.cpp new file mode 100644 index 0000000000000..83e5760f9ca82 --- /dev/null +++ b/libc/test/src/complex/cproj_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for cproj -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CprojTest.h" + +#include "src/complex/cproj.h" + +LIST_CPROJ_TESTS(_Complex double, double, LIBC_NAMESPACE::cproj) diff --git a/libc/test/src/complex/cprojf128_test.cpp b/libc/test/src/complex/cprojf128_test.cpp new file mode 100644 index 0000000000000..75708122260d6 --- /dev/null +++ b/libc/test/src/complex/cprojf128_test.cpp @@ -0,0 +1,17 @@ +//===-- Unittests for cprojf128 -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CprojTest.h" + +#include "src/complex/cprojf128.h" + +#if defined(LIBC_TYPES_HAS_CFLOAT128) + +LIST_CPROJ_TESTS(cfloat128, float128, LIBC_NAMESPACE::cprojf128) + +#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/test/src/complex/cprojf16_test.cpp b/libc/test/src/complex/cprojf16_test.cpp new file mode 100644 index 0000000000000..628cec0dc5d96 --- /dev/null +++ b/libc/test/src/complex/cprojf16_test.cpp @@ -0,0 +1,17 @@ +//===-- Unittests for cprojf16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CprojTest.h" + +#include "src/complex/cprojf16.h" + +#if defined(LIBC_TYPES_HAS_CFLOAT16) + +LIST_CPROJ_TESTS(cfloat16, float16, LIBC_NAMESPACE::cprojf16) + +#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/test/src/complex/cprojf_test.cpp b/libc/test/src/complex/cprojf_test.cpp new file mode 100644 index 0000000000000..7123ed4e28d4b --- /dev/null +++ b/libc/test/src/complex/cprojf_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for cprojf ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CprojTest.h" + +#include "src/complex/cprojf.h" + +LIST_CPROJ_TESTS(_Complex float, float, LIBC_NAMESPACE::cprojf) diff --git a/libc/test/src/complex/cprojl_test.cpp b/libc/test/src/complex/cprojl_test.cpp new file mode 100644 index 0000000000000..0858bf460188d --- /dev/null +++ b/libc/test/src/complex/cprojl_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for cprojl ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CprojTest.h" + +#include "src/complex/cprojl.h" + +LIST_CPROJ_TESTS(_Complex long double, long double, LIBC_NAMESPACE::cprojl) From 2a0091fb4abb5f89198d7e9c039da01921e2b7ee Mon Sep 17 00:00:00 2001 From: Mark Danial <118996571+madanial0@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:43:50 -0500 Subject: [PATCH 19/35] [AIX] fix unsupported diff flag on AIX (-strip-trailing-cr) (#120276) https://github.com/llvm/llvm-project/pull/119666 adds the `-strip-trailing-cr` flag to diff which is not supported on AIX switch to use the python implementation of diff instead --- clang/test/Format/lit.local.cfg | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/clang/test/Format/lit.local.cfg b/clang/test/Format/lit.local.cfg index 8acf02725d701..b060c79226cbd 100644 --- a/clang/test/Format/lit.local.cfg +++ b/clang/test/Format/lit.local.cfg @@ -1,3 +1,6 @@ +import platform +import lit.formats + # Suffixes supported by clang-format. config.suffixes = [ ".c", @@ -19,3 +22,8 @@ config.suffixes = [ ".td", ".test" ] + +# AIX 'diff' command doesn't support --strip-trailing-cr, but the internal +# python implementation does, so use that for cross platform compatibility +if platform.system() == "AIX": + config.test_format = lit.formats.ShTest() From 4ad0fdd1631eeae432714c03ede01a10dc00025d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 17 Dec 2024 20:44:31 +0000 Subject: [PATCH 20/35] [VPlan] Remove reverse() of predecessors from VPInstruction::generate. This was originally done to reduce the diff for the change. Remove it and update the remaining tests. NFC modulo reordering of incoming values. Clean up after https://github.com/llvm/llvm-project/pull/114292. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 +- .../AArch64/conditional-branches-cost.ll | 10 +- .../AArch64/deterministic-type-shrinkage.ll | 2 +- .../AArch64/divs-with-scalable-vfs.ll | 6 +- .../AArch64/epilog-vectorization-factors.ll | 6 +- .../epilog-vectorization-widen-inductions.ll | 16 +-- .../AArch64/force-target-instruction-cost.ll | 8 +- .../AArch64/induction-costs-sve.ll | 32 ++--- .../LoopVectorize/AArch64/induction-costs.ll | 6 +- ...interleave-allocsize-not-equal-typesize.ll | 2 +- .../AArch64/interleaving-load-store.ll | 4 +- .../AArch64/interleaving-reduction.ll | 4 +- .../LoopVectorize/AArch64/intrinsiccost.ll | 12 +- .../AArch64/loop-vectorization-factors.ll | 10 +- .../AArch64/low_trip_count_predicates.ll | 6 +- .../AArch64/pr60831-sve-inv-store-crash.ll | 2 +- .../LoopVectorize/AArch64/store-costs-sve.ll | 6 +- .../sve-epilog-vect-inloop-reductions.ll | 4 +- .../AArch64/sve-epilog-vect-reductions.ll | 4 +- .../sve-epilog-vect-strict-reductions.ll | 4 +- .../LoopVectorize/AArch64/sve-epilog-vect.ll | 16 +-- .../LoopVectorize/AArch64/sve-fneg.ll | 2 +- .../AArch64/sve-interleaved-accesses.ll | 6 +- .../LoopVectorize/AArch64/sve-multiexit.ll | 4 +- .../sve-runtime-check-size-based-threshold.ll | 2 +- .../AArch64/sve-vector-reverse.ll | 2 +- .../AArch64/sve2-histcnt-epilogue.ll | 2 +- .../AArch64/sve2-histcnt-too-many-deps.ll | 2 +- .../LoopVectorize/AArch64/sve2-histcnt.ll | 2 +- .../ARM/mve-gather-scatter-tailpred.ll | 2 +- .../ARM/mve-hoist-runtime-checks.ll | 2 +- .../LoopVectorize/ARM/mve-multiexit.ll | 4 +- .../LoopVectorize/PowerPC/exit-branch-cost.ll | 12 +- .../PowerPC/optimal-epilog-vectorization.ll | 12 +- .../RISCV/blocks-with-dead-instructions.ll | 2 +- .../LoopVectorize/RISCV/dead-ops-cost.ll | 4 +- .../LoopVectorize/RISCV/induction-costs.ll | 4 +- .../RISCV/masked_gather_scatter.ll | 4 +- .../LoopVectorize/RISCV/strided-accesses.ll | 20 +-- ...-force-tail-with-evl-bin-unary-ops-args.ll | 36 +++--- ...ize-force-tail-with-evl-call-intrinsics.ll | 34 ++--- ...ize-force-tail-with-evl-cast-intrinsics.ll | 20 +-- ...-force-tail-with-evl-intermediate-store.ll | 16 +-- .../vf-will-not-generate-any-vector-insts.ll | 2 +- .../LoopVectorize/X86/conversion-cost.ll | 2 +- .../LoopVectorize/X86/cost-model.ll | 10 +- .../X86/divs-with-tail-folding.ll | 4 +- .../X86/drop-poison-generating-flags.ll | 2 +- .../X86/epilog-vectorization-inductions.ll | 6 +- .../LoopVectorize/X86/float-induction-x86.ll | 8 +- .../LoopVectorize/X86/gather_scatter.ll | 8 +- .../illegal-parallel-loop-uniform-write.ll | 2 +- .../LoopVectorize/X86/induction-costs.ll | 16 +-- .../LoopVectorize/X86/interleave-cost.ll | 6 +- .../LoopVectorize/X86/intrinsiccost.ll | 12 +- .../X86/invariant-load-gather.ll | 2 +- .../X86/invariant-store-vectorization.ll | 8 +- .../X86/limit-vf-by-tripcount.ll | 6 +- .../LoopVectorize/X86/masked-store-cost.ll | 2 +- .../LoopVectorize/X86/masked_load_store.ll | 42 +++--- .../LoopVectorize/X86/multi-exit-cost.ll | 4 +- .../Transforms/LoopVectorize/X86/pr23997.ll | 2 +- .../Transforms/LoopVectorize/X86/pr35432.ll | 4 +- .../Transforms/LoopVectorize/X86/pr36524.ll | 4 +- .../Transforms/LoopVectorize/X86/pr47437.ll | 2 +- .../Transforms/LoopVectorize/X86/pr54634.ll | 2 +- ...ond-optimization-epilogue-vectorization.ll | 2 +- .../Transforms/LoopVectorize/X86/pr72969.ll | 6 +- .../LoopVectorize/X86/scatter_crash.ll | 10 +- .../LoopVectorize/X86/strided_load_cost.ll | 8 +- .../LoopVectorize/X86/uniform_mem_op.ll | 2 +- .../X86/vect.omp.force.small-tc.ll | 2 +- .../X86/vectorize-force-tail-with-evl.ll | 2 +- .../LoopVectorize/dead_instructions.ll | 2 +- .../LoopVectorize/epilog-iv-select-cmp.ll | 8 +- .../epilog-vectorization-any-of-reductions.ll | 18 +-- .../epilog-vectorization-reductions.ll | 22 ++-- ...log-vectorization-trunc-induction-steps.ll | 2 +- .../first-order-recurrence-complex.ll | 8 +- llvm/test/Transforms/LoopVectorize/fpsat.ll | 10 +- .../LoopVectorize/if-conversion-nest.ll | 4 +- .../LoopVectorize/if-pred-non-void.ll | 6 +- .../Transforms/LoopVectorize/induction.ll | 120 +++++++++--------- .../LoopVectorize/interleaved-accesses-3.ll | 6 +- .../LoopVectorize/interleaved-accesses.ll | 6 +- .../invariant-store-vectorization-2.ll | 8 +- .../invariant-store-vectorization.ll | 12 +- .../LoopVectorize/load-deref-pred-align.ll | 4 +- .../multiple-strides-vectorization.ll | 4 +- ...o-fold-tail-by-masking-iv-external-uses.ll | 4 +- .../LoopVectorize/no_outside_user.ll | 52 ++++---- .../Transforms/LoopVectorize/opaque-ptr.ll | 8 +- .../optimal-epilog-vectorization-liveout.ll | 4 +- .../optimal-epilog-vectorization.ll | 22 ++-- llvm/test/Transforms/LoopVectorize/optsize.ll | 8 +- .../pointer-select-runtime-checks.ll | 10 +- .../pr30654-phiscev-sext-trunc.ll | 12 +- llvm/test/Transforms/LoopVectorize/pr37248.ll | 4 +- llvm/test/Transforms/LoopVectorize/pr45259.ll | 2 +- ...pr47343-expander-lcssa-after-cfg-update.ll | 2 +- llvm/test/Transforms/LoopVectorize/pr50686.ll | 2 +- .../pr59319-loop-access-info-invalidation.ll | 4 +- .../LoopVectorize/reduction-align.ll | 4 +- .../LoopVectorize/reverse_induction.ll | 24 ++-- .../runtime-check-needed-but-empty.ll | 2 +- .../runtime-check-small-clamped-bounds.ll | 8 +- .../Transforms/LoopVectorize/runtime-check.ll | 4 +- ...ntime-checks-difference-simplifications.ll | 4 +- .../LoopVectorize/runtime-checks-hoist.ll | 24 ++-- .../scev-exit-phi-invalidation.ll | 4 +- .../LoopVectorize/scev-predicate-reasoning.ll | 10 +- .../LoopVectorize/select-cmp-multiuse.ll | 18 +-- .../LoopVectorize/single_early_exit.ll | 4 +- .../LoopVectorize/skeleton-lcssa-crash.ll | 2 +- .../version-stride-with-integer-casts.ll | 22 ++-- .../AArch64/indvars-vectorization.ll | 2 +- .../AArch64/matrix-extract-insert.ll | 22 ++-- .../X86/pr48844-br-to-switch-vectorization.ll | 2 +- .../test/Transforms/PhaseOrdering/X86/vdiv.ll | 2 +- 119 files changed, 538 insertions(+), 541 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5122eb01fe7a8..7239ecefbde56 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -634,10 +634,7 @@ Value *VPInstruction::generate(VPTransformState &State) { State.CFG .VPBB2IRBB[cast(getParent()->getPredecessors()[0])]; NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred); - // TODO: Predecessors are temporarily reversed to reduce test changes. - // Remove it and update remaining tests after functional change landed. - auto Predecessors = to_vector(predecessors(Builder.GetInsertBlock())); - for (auto *OtherPred : reverse(Predecessors)) { + for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) { if (OtherPred == VPlanPred) continue; NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 6956030570636..867355952cafe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -369,7 +369,7 @@ define void @latch_branch_cost(ptr %dst) { ; DEFAULT: vec.epilog.middle.block: ; DEFAULT-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] ; DEFAULT: vec.epilog.scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ] ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -588,7 +588,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: br label [[LOOP_HEADER:%.*]] ; DEFAULT: loop.header: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] @@ -715,7 +715,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[LOOP_HEADER:%.*]] ; PRED: loop.header: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] @@ -1301,7 +1301,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: br label [[LOOP_HEADER:%.*]] ; DEFAULT: loop.header: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] @@ -1523,7 +1523,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[LOOP_HEADER:%.*]] ; PRED: loop.header: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 6d43c1e1c7d48..2ccc51117bbb0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -83,7 +83,7 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur ; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N11]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 2da89120182c0..9e5c6e1527c55 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -52,7 +52,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -164,7 +164,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] @@ -291,7 +291,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index 4a2de58938043..898e515de0fe3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -85,7 +85,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -203,7 +203,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -321,7 +321,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 7645e1094e9ff..03de9acaf499b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -73,8 +73,8 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -165,7 +165,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N9]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -258,7 +258,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK:%.*]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -344,8 +344,8 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[IND_END]] ; CHECK-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -431,7 +431,7 @@ define void @test_widen_extended_induction(ptr %dst) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -509,7 +509,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index d5110c3fdd70f..cbf9bf08c2a20 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -116,12 +116,12 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[START]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N11]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], %[[ITER_CHECK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ], [ [[IND_END6]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL7]], %[[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 @@ -344,7 +344,7 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 4bb67c890f3cf..8d449f447d598 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -70,7 +70,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -146,7 +146,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -238,7 +238,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -318,7 +318,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[FOR_BODY:%.*]] ; PRED: for.body: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -407,8 +407,8 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -513,8 +513,8 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; PRED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -605,8 +605,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -710,8 +710,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; PRED-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -787,8 +787,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -854,8 +854,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; PRED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 4d070a4d02c90..3f55701f4f2a4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -263,7 +263,7 @@ define void @wide_truncated_iv(ptr %dst) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 200, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 192, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 200, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 192, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP1]] ] @@ -417,8 +417,8 @@ define void @zext_iv_increment(ptr %dst, i64 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll index 79d7ab84b3a0f..bd77f9779b680 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll @@ -54,7 +54,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll index 3a20e4a0bf8c8..24ff9c67f80e3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll @@ -103,7 +103,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] ; INTERLEAVE-4-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; INTERLEAVE-4: vec.epilog.scalar.ph: -; INTERLEAVE-4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; INTERLEAVE-4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; INTERLEAVE-4-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE-4: loop: ; INTERLEAVE-4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -198,7 +198,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[N]], [[N_VEC8]] ; INTERLEAVE-2-NEXT: br i1 [[CMP_N9]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; INTERLEAVE-2: vec.epilog.scalar.ph: -; INTERLEAVE-2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; INTERLEAVE-2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; INTERLEAVE-2-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE-2: loop: ; INTERLEAVE-2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index 07aa57d329c7d..edb58e2d8f12f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -79,8 +79,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[N]], [[N_VEC11]] ; INTERLEAVE-4-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; INTERLEAVE-4: vec.epilog.scalar.ph: -; INTERLEAVE-4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; INTERLEAVE-4-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i32 [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP17]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; INTERLEAVE-4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; INTERLEAVE-4-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i32 [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP17]], [[VEC_EPILOG_ITER_CHECK]] ] ; INTERLEAVE-4-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE-4: loop: ; INTERLEAVE-4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index 6d37dd9a4c209..393ee8d30433b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -87,9 +87,9 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[CMP_N24:%.*]] = icmp eq i64 [[N_VEC6]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N24]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi ptr [ [[IND_END9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi ptr [ [[IND_END12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ], [ [[IND_END8]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi ptr [ [[IND_END9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PSRC]], [[ITER_CHECK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi ptr [ [[IND_END12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PDST]], [[ITER_CHECK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -202,9 +202,9 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[CMP_N21:%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N21]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi ptr [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ], [ [[IND_END7]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi ptr [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PSRC]], [[ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PDST]], [[ITER_CHECK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index 2a365c91cab54..7f02792d4fe94 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -63,7 +63,7 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -166,7 +166,7 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -338,7 +338,7 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -539,7 +539,7 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -691,7 +691,7 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, ; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 7a7eb71ea2f23..528e202b4997f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -127,7 +127,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-VS1-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-VS1: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-VS1-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK-VS1: [[WHILE_BODY]]: ; CHECK-VS1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] @@ -235,7 +235,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-VS2-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-VS2: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-VS2-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK-VS2: [[WHILE_BODY]]: ; CHECK-VS2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] @@ -453,7 +453,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[WHILE_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[WHILE_PREHEADER]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[WHILE_PREHEADER]] ] ; CHECK-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK: [[WHILE_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 72a1bb2c00e54..d95cb6be0e858 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -181,7 +181,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 992, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 0bcdbed607f0d..ac054f569e11b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -70,7 +70,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-NEXT: [[CMP_N4:%.*]] = icmp eq i64 101, [[N_VEC3]] ; DEFAULT-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; DEFAULT: vec.epilog.scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -219,7 +219,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; DEFAULT: vec.epilog.scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: ; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -267,7 +267,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: ; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index 754f97c21608a..6dda3de0d7b0b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -71,8 +71,8 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1, [[ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index 9d06363846254..c2b32d87c7a16 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -71,8 +71,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP28]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP21]], [[VEC_EPILOG_ITER_CHECK]] ], [ 5, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP28]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 5, [[ITER_CHECK]] ], [ [[TMP21]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index af678d7a20f42..5c90ee3a1bc88 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -66,8 +66,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 36af7eb57868b..7d058a6ef25db 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -82,7 +82,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 1024, [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -145,7 +145,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: -; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF8: for.body: ; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -239,7 +239,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -302,7 +302,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: -; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF8: for.body: ; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -397,8 +397,8 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 10000, [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -464,8 +464,8 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: -; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-VF8-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[LOOP:%.*]] ; CHECK-VF8: loop: ; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 08d0fb77e456d..a8122849c203c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -60,7 +60,7 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 28c9c5398a7a0..185114fd935cc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1492,9 +1492,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP33:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index 6e01a5232d1a1..c20be943a2ccf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -59,7 +59,7 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] @@ -150,7 +150,7 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index feb27caf305a2..1373266497c61 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -90,7 +90,7 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 9567123ab8ae3..d55ef855604bd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -153,7 +153,7 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_MOD_VF]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ], [ [[N]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_MOD_VF]], [[MIDDLE_BLOCK]] ], [ [[N]], [[VECTOR_MEMCHECK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll index 84fc963833cf2..c74ceecf7cfe6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll @@ -66,7 +66,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[N_MOD_VF2]], 0 ; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_EXIT]], label [[SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY1]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll index c430e72cea703..d4c144ebe5dfb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll @@ -99,7 +99,7 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe ; NORMAL_DEP_LIMIT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; NORMAL_DEP_LIMIT-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; NORMAL_DEP_LIMIT: scalar.ph: -; NORMAL_DEP_LIMIT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; NORMAL_DEP_LIMIT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY1:%.*]] ] ; NORMAL_DEP_LIMIT-NEXT: br label [[FOR_BODY1:%.*]] ; NORMAL_DEP_LIMIT: for.body: ; NORMAL_DEP_LIMIT-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 728d7d1995e08..6df1241f91973 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -779,7 +779,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index fda9ef2cf6c2f..b629dfd4fce9f 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -353,7 +353,7 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll index c18592eec9f84..845cd08cbae19 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll @@ -82,7 +82,7 @@ define void @diff_checks(ptr nocapture noundef writeonly %dst, ptr nocapture nou ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[J_021_US:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_US:%.*]], [[INNER_LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-multiexit.ll index 789a97c052a96..cc2fbb1b0df79 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-multiexit.ll @@ -42,7 +42,7 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] @@ -116,7 +116,7 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 5f151c260c8fc..79ced9a483ef7 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -144,14 +144,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_N36:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]] ; CHECK-NEXT: br i1 [[CMP_N36]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL28:%.*]] = phi ptr [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX29:%.*]] = phi i64 [ [[TMP54]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL35:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL36:%.*]] = phi ptr [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], %[[ITER_CHECK]] ], [ [[IND_END27]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX37:%.*]] = phi i64 [ [[TMP54]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL26]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX29]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL28]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL35]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX37]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL36]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[PTR_IV]], align 1 ; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP53]] to i64 ; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index 95a77fb6be354..32d62befe9fc5 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -120,7 +120,7 @@ define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 ; VF-TWO-CHECK-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC18]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N19]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: -; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-TWO-CHECK: for.body: ; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -252,7 +252,7 @@ define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 ; VF-FOUR-CHECK-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC18]] ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N19]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-FOUR-CHECK: vec.epilog.scalar.ph: -; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-FOUR-CHECK: for.body: ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -438,8 +438,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-TWO-CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: -; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END18]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END18]], [[VEC_EPILOG_ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-TWO-CHECK: for.body: ; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -597,8 +597,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; VF-FOUR-CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-FOUR-CHECK: vec.epilog.scalar.ph: -; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END18]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END18]], [[VEC_EPILOG_ITER_CHECK]] ] ; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-FOUR-CHECK: for.body: ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 00fc704a92b05..11efac951082a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -874,7 +874,7 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index ec97105bf9da6..16c23cd777b65 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -134,7 +134,7 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -265,7 +265,7 @@ define i32 @cost_of_exit_branch_and_cond_insts(ptr %a, ptr %b, i1 %c, i16 %x) #0 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index dfa72ce28c2e2..8131c7bfd752d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -91,8 +91,8 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X_I64]], %[[ENTRY]] ], [ [[X_I64]], %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i32 [ [[IND_END22]], %[[MIDDLE_BLOCK]] ], [ [[X_I32]], %[[ENTRY]] ], [ [[X_I32]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X_I64]], %[[VECTOR_MEMCHECK]] ], [ [[X_I64]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i32 [ [[IND_END22]], %[[MIDDLE_BLOCK]] ], [ [[X_I32]], %[[VECTOR_MEMCHECK]] ], [ [[X_I32]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 59aa9b23cf66b..2c19aab81251a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -69,7 +69,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] ; RV32-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; RV32: scalar.ph: -; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; RV32-NEXT: br label [[FOR_BODY:%.*]] ; RV32: for.body: ; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -148,7 +148,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] ; RV64-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; RV64: scalar.ph: -; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; RV64-NEXT: br label [[FOR_BODY:%.*]] ; RV64: for.body: ; RV64-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 4154ae4bb4b53..8395ffd58db90 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -263,7 +263,7 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; NOSTRIDED: scalar.ph: -; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -347,8 +347,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; NOSTRIDED: scalar.ph: -; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -481,7 +481,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; NOSTRIDED: scalar.ph: -; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -555,7 +555,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; STRIDED-NEXT: br label [[LOOP:%.*]] ; STRIDED: loop: ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -625,8 +625,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; NOSTRIDED: scalar.ph: -; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -778,9 +778,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; STRIDED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ], [ [[P]], [[VECTOR_MEMCHECK]] ] -; STRIDED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[P2]], [[ENTRY]] ], [ [[P2]], [[VECTOR_MEMCHECK]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[VECTOR_MEMCHECK]] ], [ [[P]], [[ENTRY]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[P2]], [[VECTOR_MEMCHECK]] ], [ [[P2]], [[ENTRY]] ] ; STRIDED-NEXT: br label [[LOOP:%.*]] ; STRIDED: loop: ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll index 40ae96f378b6c..38fa8d3d9d9c5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll @@ -54,7 +54,7 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -148,7 +148,7 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -242,7 +242,7 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -336,7 +336,7 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -430,7 +430,7 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -524,7 +524,7 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -618,7 +618,7 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -712,7 +712,7 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -806,7 +806,7 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -900,7 +900,7 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -994,7 +994,7 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1088,7 +1088,7 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1182,7 +1182,7 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1279,7 +1279,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1374,7 +1374,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1469,7 +1469,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1564,7 +1564,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -1712,7 +1712,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll index 66a5e086a0117..11cf832c8abbf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll @@ -67,7 +67,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -178,11 +178,11 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -293,11 +293,11 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -408,11 +408,11 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -515,11 +515,11 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -615,11 +615,11 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] ; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -718,11 +718,11 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -827,11 +827,11 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -934,11 +934,11 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll index 4557e95f1e1b6..78b9e19fb3966 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cast-intrinsics.ll @@ -59,7 +59,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -159,7 +159,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -259,7 +259,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -359,7 +359,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -459,7 +459,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -559,7 +559,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -659,7 +659,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -759,7 +759,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -859,7 +859,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -959,7 +959,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; IF-EVL: [[SCALAR_PH]]: -; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; IF-EVL-NEXT: br label %[[LOOP:.*]] ; IF-EVL: [[LOOP]]: ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll index 733c05fd9259c..d4881bc50229c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-intermediate-store.ll @@ -71,8 +71,8 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-OUTLOOP-NEXT: store i32 [[TMP23]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] ; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; IF-EVL-OUTLOOP: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY1]] ], [ [[START]], [[VECTOR_MEMCHECK]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY1]] ] ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY1:%.*]] ; IF-EVL-OUTLOOP: for.body: ; IF-EVL-OUTLOOP-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] @@ -134,8 +134,8 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] ; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; IF-EVL-INLOOP: scalar.ph: -; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ], [ [[START]], [[VECTOR_MEMCHECK]] ] +; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ] ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL-INLOOP: for.body: ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -191,8 +191,8 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; NO-VP-OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; NO-VP-OUTLOOP: scalar.ph: -; NO-VP-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; NO-VP-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ], [ [[START]], [[VECTOR_MEMCHECK]] ] +; NO-VP-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ] ; NO-VP-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP-OUTLOOP: for.body: ; NO-VP-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -247,8 +247,8 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; NO-VP-INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; NO-VP-INLOOP: scalar.ph: -; NO-VP-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; NO-VP-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ], [ [[START]], [[VECTOR_MEMCHECK]] ] +; NO-VP-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ] ; NO-VP-INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP-INLOOP: for.body: ; NO-VP-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll index e7fdfbcf76caa..4efc231c92a4d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll @@ -33,7 +33,7 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TMP3:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index a1cc59909adbe..f3190369ae2a2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -68,7 +68,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N12]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, [[ITER_CHECK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index af1e78cc65bbe..bfff5c94d727f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -275,8 +275,8 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[CMP_N14]], label [[LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX15:%.*]] = phi float [ [[TMP157]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP124]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX15:%.*]] = phi float [ [[TMP157]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ITER_CHECK]] ], [ [[TMP124]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR]] ] @@ -388,8 +388,8 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ] @@ -833,7 +833,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC39]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC39]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll index c861aa8172b9b..f14422e0a6069 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll @@ -46,7 +46,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -162,7 +162,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index eaea69735fcbe..0686395567cc2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -377,7 +377,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[BODY:%.*]] ; CHECK: body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 4400b6ba568d2..5fb7df2c74d93 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -92,7 +92,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -225,8 +225,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[L]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N25]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i16 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i16 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 1fab786f7c74a..ab0b45473a623 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -84,8 +84,8 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[N_VEC3]], [[ZEXT]] ; AUTO_VEC-NEXT: br i1 [[CMP_N14]], label [[FOR_END]], label [[FOR_BODY]] ; AUTO_VEC: for.body: -; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ITER_CHECK]] ], [ [[IND_END8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END8]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, [[ITER_CHECK]] ], [ [[IND_END6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 @@ -468,8 +468,8 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC: for.cond.cleanup: ; AUTO_VEC-NEXT: ret void ; AUTO_VEC: for.body: -; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[ADD3:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ITER_CHECK]] ], [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[ADD3:%.*]], [[FOR_BODY]] ], [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, [[ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[P]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 9adc64a6020e7..b6bccab5c2e4a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -705,8 +705,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]] ; AVX512-NEXT: br i1 [[CMP_N17]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL13:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR]], [[ITER_CHECK]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ] -; AVX512-NEXT: [[BC_RESUME_VAL16:%.*]] = phi ptr [ [[IND_END14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END15]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL13:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[ITER_CHECK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL16:%.*]] = phi ptr [ [[IND_END14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ], [ [[DEST]], [[ITER_CHECK]] ], [ [[IND_END15]], [[VEC_EPILOG_ITER_CHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[PTR_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL13]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] @@ -801,8 +801,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; FVW2-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[SCALAR_PH]] ; FVW2: scalar.ph: -; FVW2-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ] -; FVW2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[DEST]], [[FOR_BODY_LR_PH]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ] +; FVW2-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ] +; FVW2-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ], [ [[DEST]], [[FOR_BODY_LR_PH]] ] ; FVW2-NEXT: br label [[FOR_BODY:%.*]] ; FVW2: for.body: ; FVW2-NEXT: [[PTR_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index 5ba559af077ca..304105fd9925a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -87,7 +87,7 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ] ; CHECK-NEXT: br label [[FOR_BODY3_US]] ; CHECK: for.end15.loopexit: ; CHECK-NEXT: br label [[FOR_END15]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 3d0fe42635fe5..3b550449006f3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -74,7 +74,7 @@ define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 { ; CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC9]] ; CHECK-NEXT: br i1 [[CMP_N20]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP1]] ] @@ -144,7 +144,7 @@ define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -347,11 +347,11 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP22]], i32 15 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY:%.*]] ], [ 100, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[ENTRY]] ], [ 2048, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[VECTOR_MEMCHECK]] ], [ 100, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[VECTOR_MEMCHECK]] ], [ [[A]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[VECTOR_MEMCHECK]] ], [ [[B]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[VECTOR_MEMCHECK]] ], [ [[B]], [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[VECTOR_MEMCHECK]] ], [ 2048, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LOOP]] ] @@ -541,7 +541,7 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index e5dd723be0c20..5c9375eb1d17f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -111,7 +111,7 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -274,7 +274,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -501,7 +501,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll index b11a58c4c0ae6..b2772648b5ee1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -93,9 +93,9 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[CMP_N26:%.*]] = icmp eq i64 [[N_VEC8]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N26]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL16:%.*]] = phi ptr [ [[IND_END14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END15]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PSRC]], [[ITER_CHECK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL16:%.*]] = phi ptr [ [[IND_END14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PDST]], [[ITER_CHECK]] ], [ [[IND_END15]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -212,9 +212,9 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[CMP_N23:%.*]] = icmp eq i64 [[N_VEC7]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N23]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL12:%.*]] = phi ptr [ [[IND_END10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi ptr [ [[IND_END13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END14]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL12:%.*]] = phi ptr [ [[IND_END10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PSRC]], [[ITER_CHECK]] ], [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi ptr [ [[IND_END13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PDST]], [[ITER_CHECK]] ], [ [[IND_END14]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index d2d33cbda8062..e8e0608329d89 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -70,7 +70,7 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[CMP_N16]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC7]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC7]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index 41841f27a0c06..adfffccb6bcac 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -82,8 +82,8 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) ; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC13]] ; CHECK-NEXT: br i1 [[CMP_N18]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX19:%.*]] = phi i32 [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP10]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX19:%.*]] = phi i32 [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP10]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -210,7 +210,7 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3 ; CHECK-NEXT: [[CMP_N23:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC9]] ; CHECK-NEXT: br i1 [[CMP_N23]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -335,7 +335,7 @@ define void @variant_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b ; CHECK-NEXT: [[CMP_N28:%.*]] = icmp eq i64 [[SMAX10]], [[N_VEC17]] ; CHECK-NEXT: br i1 [[CMP_N28]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index 8688b246c60f4..50414cc29312c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -99,7 +99,7 @@ define void @test_tc_18(ptr noalias %src, ptr noalias %dst) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 18, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 18, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] @@ -172,7 +172,7 @@ define void @test_tc_19(ptr noalias %src, ptr noalias %dst) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 18, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 18, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] @@ -257,7 +257,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll index 7f0b6b2f9b4d7..f0a1e5c47d06a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll @@ -84,7 +84,7 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 739833653cb16..7e42ffd0f44c3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -53,7 +53,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1: middle.block: ; AVX1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AVX1: scalar.ph: -; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; AVX1-NEXT: br label [[FOR_BODY:%.*]] ; AVX1: for.body: ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -160,7 +160,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: vec.epilog.middle.block: ; AVX2-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] ; AVX2: vec.epilog.scalar.ph: -; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] ; AVX2-NEXT: br label [[FOR_BODY1:%.*]] ; AVX2: for.body: ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -267,7 +267,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -354,7 +354,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX1: middle.block: ; AVX1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AVX1: scalar.ph: -; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; AVX1-NEXT: br label [[FOR_BODY:%.*]] ; AVX1: for.body: ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -461,7 +461,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2: vec.epilog.middle.block: ; AVX2-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] ; AVX2: vec.epilog.scalar.ph: -; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] ; AVX2-NEXT: br label [[FOR_BODY1:%.*]] ; AVX2: for.body: ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -568,7 +568,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -665,7 +665,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1: middle.block: ; AVX1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AVX1: scalar.ph: -; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; AVX1-NEXT: br label [[FOR_BODY:%.*]] ; AVX1: for.body: ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -778,7 +778,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: vec.epilog.middle.block: ; AVX2-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] ; AVX2: vec.epilog.scalar.ph: -; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] ; AVX2-NEXT: br label [[FOR_BODY1:%.*]] ; AVX2: for.body: ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -891,7 +891,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1019,7 +1019,7 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1: middle.block: ; AVX1-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AVX1: scalar.ph: -; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; AVX1-NEXT: br label [[FOR_BODY:%.*]] ; AVX1: for.body: ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1107,7 +1107,7 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: middle.block: ; AVX2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AVX2: scalar.ph: -; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; AVX2-NEXT: br label [[FOR_BODY:%.*]] ; AVX2: for.body: ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1222,7 +1222,7 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: br i1 true, label [[FOR_END]], label [[SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 9984, [[VEC_EPILOG_ITER_CHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY1:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1347,7 +1347,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: middle.block: ; AVX512-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AVX512: scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1527,7 +1527,7 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2: middle.block: ; AVX2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AVX2: scalar.ph: -; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[ENTRY:%.*]] ], [ 4095, [[VECTOR_MEMCHECK]] ] +; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[VECTOR_MEMCHECK]] ], [ 4095, [[ENTRY:%.*]] ] ; AVX2-NEXT: br label [[FOR_BODY:%.*]] ; AVX2: for.body: ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1639,7 +1639,7 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512: middle.block: ; AVX512-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; AVX512: scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[ENTRY:%.*]] ], [ 4095, [[VECTOR_MEMCHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[VECTOR_MEMCHECK]] ], [ 4095, [[ENTRY:%.*]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1806,7 +1806,7 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] ; AVX1-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX1: vec.epilog.scalar.ph: -; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; AVX1-NEXT: br label [[FOR_BODY1:%.*]] ; AVX1: for.body: ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -1942,7 +1942,7 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] ; AVX2-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX2: vec.epilog.scalar.ph: -; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; AVX2-NEXT: br label [[FOR_BODY1:%.*]] ; AVX2: for.body: ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -2078,7 +2078,7 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] ; AVX512-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY1:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -2259,7 +2259,7 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] ; AVX1-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX1: vec.epilog.scalar.ph: -; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; AVX1-NEXT: br label [[FOR_BODY1:%.*]] ; AVX1: for.body: ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -2395,7 +2395,7 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] ; AVX2-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX2: vec.epilog.scalar.ph: -; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; AVX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; AVX2-NEXT: br label [[FOR_BODY1:%.*]] ; AVX2: for.body: ; AVX2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -2531,7 +2531,7 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC9]] ; AVX512-NEXT: br i1 [[CMP_N14]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX512: vec.epilog.scalar.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY1:%.*]] ; AVX512: for.body: ; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index 4e768074019d3..7b29d0ef7cbb5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -57,8 +57,8 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[TMP31:%.*]] = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> [[TMP29]]) ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP31]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[TMP31]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll index 55c9930183b80..f7a3a1245c286 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -75,7 +75,7 @@ define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrs ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[CMP_N12]], label [[LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT4:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index 1845243d1278e..1194e82325ef7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -83,8 +83,8 @@ define i32 @main(ptr %ptr) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP4]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index ee8374f952c7a..90b86bb3ffb07 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -37,8 +37,8 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 80, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 82, [[MIDDLE_BLOCK]] ], [ 2, [[ENTRY]] ], [ 2, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 80, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 82, [[MIDDLE_BLOCK]] ], [ 2, [[VECTOR_MEMCHECK]] ], [ 2, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll index 7b1c7ae94ff41..d8ec92124682a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll @@ -307,7 +307,7 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; AVX1-NEXT: [[CMP_N34:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC25]] ; AVX1-NEXT: br i1 [[CMP_N34]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; AVX1: vec.epilog.scalar.ph: -; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC25]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; AVX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC25]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; AVX1-NEXT: br label [[FOR_BODY1:%.*]] ; AVX1: for.body: ; AVX1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY1]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index 23a6a1286a0f0..1cee80f88ec62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -92,7 +92,7 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[L44]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[L27:%.*]] ; CHECK: L26: ; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP27:%.*]], [[L27]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll index 5476ff504edb3..c317e877ee8a5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr56319-vector-exit-cond-optimization-epilogue-vectorization.ll @@ -49,7 +49,7 @@ define void @pr56319(ptr noalias %src, ptr noalias %dst) { ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 36, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 32, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 36, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 32, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index f0dfcf3eada72..5a0aec967ccd9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -86,9 +86,9 @@ define void @test(ptr %p) { ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; VEC: scalar.ph: -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ] -; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ] +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ] +; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY]] ] ; VEC-NEXT: br label [[FOR_BODY:%.*]] ; VEC: for.body: ; VEC-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IDX:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index f54c0a14cf63c..236ed30be4f13 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -109,8 +109,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[CMP_N23:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[CMP_N23]], label [[FOR_COND_CLEANUP_LOOPEXIT99]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 8, [[ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: iter.check27: ; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP3]], -9 @@ -205,13 +205,13 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] -; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK45:%.*]], label [[VEC_EPILOG_VECTOR_BODY58]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK46:%.*]], label [[VEC_EPILOG_VECTOR_BODY58]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: vec.epilog.middle.block46: ; CHECK-NEXT: [[CMP_N75:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[CMP_N75]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH46]] ; CHECK: vec.epilog.scalar.ph47: -; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[ITER_CHECK27]] ] -; CHECK-NEXT: [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[ITER_CHECK27]] ] +; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK46]] ], [ 8, [[ITER_CHECK27]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ] +; CHECK-NEXT: [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK46]] ], [ 0, [[ITER_CHECK27]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ] ; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] ; CHECK: for.body.us: ; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH46]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index ad8f1fb3ccd21..d316befb9548d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -222,8 +222,8 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY1]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ], [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] @@ -454,8 +454,8 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]]) ; MAX-BW-NEXT: br i1 true, label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] ; MAX-BW: vec.epilog.scalar.ph: -; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; MAX-BW-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 96, [[VEC_EPILOG_ITER_CHECK]] ] +; MAX-BW-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP149]], [[VEC_EPILOG_ITER_CHECK]] ] ; MAX-BW-NEXT: br label [[FOR_BODY1:%.*]] ; MAX-BW: for.cond.cleanup: ; MAX-BW-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY1]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ], [ [[TMP170]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index 45594b0335336..26a9e179e3c53 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -299,7 +299,7 @@ define void @uniform_copy(ptr %A, ptr %B) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 90ba702ed232e..eb52420949f8d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -82,7 +82,7 @@ define void @vectorized(ptr noalias nocapture %A, ptr noalias nocapture readonly ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll index e37eae4c1f390..a3b066ed82216 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll @@ -141,7 +141,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] ; NO-VP-NEXT: br i1 [[CMP_N11]], label [[FOR_COND_CLEANUP]], label [[VEC_EPILOG_SCALAR_PH]] ; NO-VP: vec.epilog.scalar.ph: -; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; NO-VP-NEXT: br label [[FOR_BODY:%.*]] ; NO-VP: for.body: ; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index e1fd07bf590c4..d19fe8416200e 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -165,7 +165,7 @@ define void @dead_load_and_vector_pointer(ptr %a, ptr %b) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 6106cc0046326..052b4a10e9c8d 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -70,8 +70,8 @@ define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 3, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 3, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -174,8 +174,8 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N12]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 2, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i64 [ [[RDX_SELECT11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 2, %[[ITER_CHECK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 47a6312ab2f29..c159ec868c357 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -67,8 +67,8 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -166,8 +166,8 @@ define i32 @any_of_reduction_epilog_arg_as_start_value(ptr %src, i64 %N, i32 %st ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -274,9 +274,9 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ false, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ false, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -426,8 +426,8 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i1 [ [[RDX_SELECT22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ true, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK:%.*]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i1 [ [[RDX_SELECT22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ true, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[RED:%.*]] = phi i1 [ [[BC_MERGE_RDX23]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index f12aab5079ed9..c6237170eebb1 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -58,8 +58,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i64 [ [[TMP12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 5, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i64 [ [[TMP12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 5, [[ITER_CHECK]] ], [ [[TMP5]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -151,8 +151,8 @@ define float @fp_reduction_max(ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi float [ [[TMP13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi float [ [[TMP13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -245,8 +245,8 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[TMP24:%.*]] = zext i16 [[TMP23]] to i32 ; CHECK-NEXT: br i1 true, label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 256, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX5:%.*]] = phi i32 [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP11]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 256, [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX5:%.*]] = phi i32 [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP11]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] @@ -349,9 +349,9 @@ define float @multiple_fp_rdx(ptr %A, i64 %N) { ; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N11]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi float [ [[TMP16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1.500000e+01, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi float [ [[TMP17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+01, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi float [ [[TMP16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1.500000e+01, [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi float [ [[TMP17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -452,8 +452,8 @@ define i32 @reduction_phi_start_val(ptr %A, i64 %N) { ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[FOR_COND]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i32 [ [[TMP13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START_SUM]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i32 [ [[TMP13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START_SUM]], [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll index 1854acf0ec2ba..06ca7b197279c 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll @@ -63,7 +63,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) { ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index bd3688d852d0f..fe16e8ce6f97b 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -852,8 +852,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -935,8 +935,8 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/fpsat.ll b/llvm/test/Transforms/LoopVectorize/fpsat.ll index 8d55b6349995b..7df0a34c35b85 100644 --- a/llvm/test/Transforms/LoopVectorize/fpsat.ll +++ b/llvm/test/Transforms/LoopVectorize/fpsat.ll @@ -37,7 +37,7 @@ define void @signed(ptr %x, ptr %y, i32 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -52,7 +52,7 @@ define void @signed(ptr %x, ptr %y, i32 %n) { ; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: %cmp6 = icmp sgt i32 %n, 0 @@ -108,12 +108,12 @@ define void @unsigned(ptr %x, ptr %y, i32 %n) { ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -128,7 +128,7 @@ define void @unsigned(ptr %x, ptr %y, i32 %n) { ; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; entry: %cmp6 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index 492eb091175e2..ff5d45d3f7f0f 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -46,7 +46,7 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -160,7 +160,7 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index b8597b85f79be..e733a4a381e50 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -273,7 +273,7 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud, ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: ; UNROLL-NO-VF-NEXT: ret void @@ -484,7 +484,7 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: ; UNROLL-NO-VF-NEXT: ret void @@ -682,7 +682,7 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {; ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: ; UNROLL-NO-VF-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index e0b1c5f2d861b..28c1c2afbe081 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -302,7 +302,7 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -362,7 +362,7 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) { ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; IND-NEXT: br label [[FOR_BODY:%.*]] ; IND: for.body: ; IND-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -429,7 +429,7 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) { ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: ; UNROLL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -500,7 +500,7 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -567,7 +567,7 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) { ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] ; INTERLEAVE: for.body: ; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1593,7 +1593,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1656,7 +1656,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; IND-NEXT: br label [[FOR_BODY:%.*]] ; IND: for.body: ; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1733,7 +1733,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1812,7 +1812,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -1900,7 +1900,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: br label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] ; INTERLEAVE: for.body: ; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3472,9 +3472,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3537,9 +3537,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] +; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3606,9 +3606,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3680,9 +3680,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3748,9 +3748,9 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3852,9 +3852,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3920,9 +3920,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; IND-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -3992,9 +3992,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4069,9 +4069,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4140,9 +4140,9 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ], [ [[T]], [[LOOP_PREHEADER]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4423,7 +4423,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4461,7 +4461,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; IND-NEXT: br label [[FOR_BODY:%.*]] ; IND: for.body: ; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4504,7 +4504,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4553,7 +4553,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -4594,7 +4594,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] ; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] ; INTERLEAVE: for.body: ; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -6236,9 +6236,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6307,9 +6307,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6382,9 +6382,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6463,9 +6463,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6538,9 +6538,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll index 7bbf750f1702b..e101a61ba4148 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll @@ -67,8 +67,8 @@ define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapt ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[FOR_BODY_PREHEADER]] ], [ [[X]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[VECTOR_SCEVCHECK]] ], [ [[X]], %[[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: ; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] @@ -76,7 +76,7 @@ define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapt ; CHECK-NEXT: ret void ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[INDEX_011:%.*]] = phi i8 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX_011:%.*]] = phi i8 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[INDEX_011]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IDXPROM]] ; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index e3f9126f22607..4d0f6e67dfa07 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1508,9 +1508,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll index f5c0d8bb85836..bc1c1bf04a37f 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll @@ -55,7 +55,7 @@ define void @inv_val_store_to_inv_address_conditional_diff_values_ic(ptr %a, i64 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -153,7 +153,7 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -241,8 +241,8 @@ define i32 @variant_val_store_to_inv_address(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index 689d00e1d0186..88be9fa1c8689 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -50,8 +50,8 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -122,7 +122,7 @@ define void @inv_val_store_to_inv_address(ptr %a, i64 %n, ptr %b) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -224,7 +224,7 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -401,8 +401,8 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[FOR_BODY3_LR_PH]] ], [ [[TMP4]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[VECTOR_MEMCHECK]] ], [ [[TMP4]], [[FOR_BODY3_LR_PH]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ] ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: ; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[FOR_BODY3]] ] diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 956b70fe92c50..0f4e327891899 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -265,8 +265,8 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll index a0cd3c64f2d77..851db7c44d51e 100644 --- a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll @@ -84,7 +84,7 @@ define void @Test(ptr nocapture %obj, i64 %z) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[Z]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOTOUTER]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTOUTER_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[DOTOUTER_PREHEADER]] ] ; CHECK-NEXT: br label [[DOTINNER:%.*]] ; CHECK: .exit: ; CHECK-NEXT: ret void @@ -151,7 +151,7 @@ define void @Test(ptr nocapture %obj, i64 %z) #0 { ; CHECK-HOIST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[Z]], [[N_VEC]] ; CHECK-HOIST-NEXT: br i1 [[CMP_N]], label [[DOTOUTER]], label [[SCALAR_PH]] ; CHECK-HOIST: scalar.ph: -; CHECK-HOIST-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTOUTER_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-HOIST-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[DOTOUTER_PREHEADER]] ] ; CHECK-HOIST-NEXT: br label [[DOTINNER:%.*]] ; CHECK-HOIST: .exit: ; CHECK-HOIST-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll index 7a3f9df6ec282..e0f57d5233304 100644 --- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll +++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll @@ -54,8 +54,8 @@ define i32 @test(ptr %arr, i64 %n) { ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOAD_VAL:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[CONV:%.*]] = phi i64 [ [[CONV2:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index c45d05e21a138..713b8f8d97951 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -23,7 +23,7 @@ define i32 @test1() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -44,12 +44,12 @@ define i32 @test1() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -96,7 +96,7 @@ define i32 @test2() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -117,12 +117,12 @@ define i32 @test2() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -169,7 +169,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -196,12 +196,12 @@ define i32 @test3(i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -258,7 +258,7 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -279,12 +279,12 @@ define i32 @test4(i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -520,7 +520,7 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I1:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -542,12 +542,12 @@ define i8 @outside_user_non_phi() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I1]] +; CHECK: [[_LR_PH_I1]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I1]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP8]], 10 ; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16]], label %[[BB10:.*]] ; CHECK: [[BB10]]: @@ -687,7 +687,7 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] ; CHECK: [[_LR_PH_I]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[OFFSET_IDX]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] ; CHECK: [[_LR_PH_I1:.*:]] ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] diff --git a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll index f62c3c7f42ec4..b001b6fa56208 100644 --- a/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll +++ b/llvm/test/Transforms/LoopVectorize/opaque-ptr.ll @@ -64,8 +64,8 @@ define void @test_ptr_iv_no_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[ENTRY:%.*]] ], [ [[P1_START]], [[VECTOR_SCEVCHECK]] ], [ [[P1_START]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END8]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[ENTRY]] ], [ [[P2_START]], [[VECTOR_SCEVCHECK]] ], [ [[P2_START]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[VECTOR_MEMCHECK]] ], [ [[P1_START]], [[VECTOR_SCEVCHECK]] ], [ [[P1_START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END8]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[VECTOR_MEMCHECK]] ], [ [[P2_START]], [[VECTOR_SCEVCHECK]] ], [ [[P2_START]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P1_NEXT:%.*]], [[LOOP]] ] @@ -154,8 +154,8 @@ define void @test_ptr_iv_with_inbounds(ptr %p1.start, ptr %p2.start, ptr %p1.end ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[ENTRY:%.*]] ], [ [[P1_START]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[IND_END6]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[ENTRY]] ], [ [[P2_START]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P1_START]], [[VECTOR_MEMCHECK]] ], [ [[P1_START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[IND_END6]], [[MIDDLE_BLOCK]] ], [ [[P2_START]], [[VECTOR_MEMCHECK]] ], [ [[P2_START]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P1_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll index 4a8fda99ef486..06ac6e75cd74b 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-liveout.ll @@ -64,13 +64,13 @@ define signext i32 @f1(ptr noalias %A, ptr noalias %B, i32 signext %n) { ; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = add nsw <2 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD8]] ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 ; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] -; VF-TWO-CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; VF-TWO-CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF-TWO-CHECK: vec.epilog.middle.block: ; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1 ; VF-TWO-CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; VF-TWO-CHECK: vec.epilog.scalar.ph: -; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] ; VF-TWO-CHECK: for.body: ; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 9f5e5f3980e6f..70199fa1e0797 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -76,7 +76,7 @@ define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 ; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -234,8 +234,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -363,7 +363,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -422,7 +422,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.scalar.ph: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: for.body: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -528,8 +528,8 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 85, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 85, [[VEC_EPILOG_ITER_CHECK]] ], [ 1, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i8 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 85, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1, [[ITER_CHECK]] ], [ 85, [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i8 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[INNER]] ] @@ -618,8 +618,8 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label [[OUTER_LATCH]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.scalar.ph: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 85, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 85, [[VEC_EPILOG_ITER_CHECK]] ], [ 1, [[ITER_CHECK]] ] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i8 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 85, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1, [[ITER_CHECK]] ], [ 85, [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i8 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[INNER:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: inner: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[INNER]] ] @@ -720,7 +720,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -790,7 +790,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.scalar.ph: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[LOOP:%.*]] ; CHECK-PROFITABLE-BY-DEFAULT: loop: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index 738f265b89d65..03812f4acb1eb 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -489,7 +489,7 @@ define void @pr43371_pgso() !prof !14 { ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]] ; NPGSO: [[SCALAR_PH]]: -; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; NPGSO-NEXT: br label %[[FOR_BODY29:.*]] ; NPGSO: [[FOR_COND_CLEANUP28]]: ; NPGSO-NEXT: unreachable @@ -854,7 +854,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -888,7 +888,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; PGSO: [[MIDDLE_BLOCK]]: ; PGSO-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] ; PGSO: [[SCALAR_PH]]: -; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; PGSO-NEXT: br label %[[FOR_BODY:.*]] ; PGSO: [[FOR_BODY]]: ; PGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -922,7 +922,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] ; NPGSO: [[SCALAR_PH]]: -; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; NPGSO-NEXT: br label %[[FOR_BODY:.*]] ; NPGSO: [[FOR_BODY]]: ; NPGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll index e652d86944c4d..00daf7d34483c 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll @@ -42,7 +42,7 @@ define void @test1_select_invariant(ptr %src.1, ptr %src.2, ptr %dst, i1 %c, i8 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -121,7 +121,7 @@ define void @test_loop_dependent_select1(ptr %src.1, ptr %src.2, ptr %dst, i1 %c ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -211,7 +211,7 @@ define void @test_loop_dependent_select2(ptr %src.1, ptr %src.2, ptr %dst, i8 %n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -296,7 +296,7 @@ define void @test_loop_dependent_select_first_ptr_noundef(ptr noundef %src.1, pt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -381,7 +381,7 @@ define void @test_loop_dependent_select_second_ptr_noundef(ptr %src.1, ptr nound ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index 0cda697e0337a..2741b39693579 100644 --- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -92,8 +92,8 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -215,8 +215,8 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -410,8 +410,8 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 9f8f618f09585..ed7762fbc2ba3 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -71,7 +71,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ], [ [[START]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_SCEVCHECK]] ], [ [[START]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LAND_END:%.*]] ] @@ -156,7 +156,7 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ], [ [[START]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_SCEVCHECK]] ], [ [[START]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LAND_END:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll index 6baed089fb6b6..9ae8f69b50a90 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -58,7 +58,7 @@ define i8 @widget(ptr %arr, i8 %t9) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[FOR_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll index 6aaa44344ae46..3d85a7289637f 100644 --- a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll +++ b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll @@ -54,7 +54,7 @@ define void @f() { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 500, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr50686.ll b/llvm/test/Transforms/LoopVectorize/pr50686.ll index 6ce491e53c256..4d8562247871a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr50686.ll +++ b/llvm/test/Transforms/LoopVectorize/pr50686.ll @@ -40,7 +40,7 @@ define void @m(ptr nocapture %p, ptr nocapture %p2, i32 %q) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[FOR_END17:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND5:%.*]] ; CHECK: for.cond5: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND5]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll index d8b6a4e758851..3dafe8270dc3f 100644 --- a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll @@ -63,7 +63,7 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[LOOP_3_LR_PH:%.*]], label [[SCALAR_PH5]] ; CHECK: scalar.ph5: -; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK4]] ], [ 0, [[LOOP_2_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK4]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[LOOP_2_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.3.lr.ph: ; CHECK-NEXT: [[IDXPROM_I_I61:%.*]] = and i64 [[IV761_LCSSA]], 1 @@ -93,7 +93,7 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC25]] ; CHECK-NEXT: br i1 [[CMP_N27]], label [[LOOP_CLEANUP:%.*]], label [[SCALAR_PH21]] ; CHECK: scalar.ph21: -; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], [[MIDDLE_BLOCK20]] ], [ 0, [[LOOP_3_LR_PH]] ], [ 0, [[VECTOR_MEMCHECK14]] ] +; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], [[MIDDLE_BLOCK20]] ], [ 0, [[VECTOR_MEMCHECK14]] ], [ 0, [[LOOP_3_LR_PH]] ] ; CHECK-NEXT: br label [[LOOP_3:%.*]] ; CHECK: loop.2: ; CHECK-NEXT: [[IV846:%.*]] = phi i64 [ [[IV_NEXT85:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL13]], [[SCALAR_PH5]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-align.ll b/llvm/test/Transforms/LoopVectorize/reduction-align.ll index 8eef1ed90c0c9..69e3e07ed3139 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-align.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-align.ll @@ -40,8 +40,8 @@ define void @fn(ptr %hbuf, ptr %ref, i32 %height) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[HEIGHT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP4:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index 3b6852ba59505..5028dc5355c50 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -41,12 +41,12 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: br i1 true, label %[[LOOPEND:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[STARTVAL]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[ADD_I7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_I]] = add i64 [[ADD_I7]], -1 ; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD_I]] @@ -114,12 +114,12 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: br i1 true, label %[[LOOPEND:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i128 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[STARTVAL]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[ADD_I7:%.*]] = phi i128 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_I]] = add i128 [[ADD_I7]], -1 ; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[ADD_I]] @@ -196,13 +196,13 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br i1 true, label %[[LOOPEND:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[STARTVAL]], %[[ENTRY]] ], [ [[STARTVAL]], %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[STARTVAL]], %[[VECTOR_SCEVCHECK]] ], [ [[STARTVAL]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[ADD_I7:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ADD_I]] = add i16 [[ADD_I7]], -1 ; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[ADD_I]] @@ -285,11 +285,11 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK: [[WHILE_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] +; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] ; CHECK-NEXT: [[INC]] = add i8 [[FORWARD_INDUCTION_05]], 1 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[INC]] to i32 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[INDVARS_IV]] @@ -354,11 +354,11 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ -127, %[[MIDDLE_BLOCK]] ], [ -127, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ -127, %[[MIDDLE_BLOCK]] ], [ -127, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK: [[WHILE_BODY]]: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] +; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] ; CHECK-NEXT: [[INC]] = add i8 [[FORWARD_INDUCTION_05]], 1 ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[INC]] to i32 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index 7b2af60fcfd23..e14c547d96ad3 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -31,7 +31,7 @@ define void @test(ptr %A, i32 %x) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll index bb515cd583e5b..a3bf5c76a20a6 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -47,7 +47,7 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -120,7 +120,7 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -201,7 +201,7 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -384,7 +384,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index 5c817ea313183..e0e80271c0d26 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -42,7 +42,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]], !dbg [[DBG9]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]], !dbg [[DBG9]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ], !dbg [[DBG9]] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], !dbg [[DBG9]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg [[DBG9]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], !dbg [[DBG9]] @@ -147,7 +147,7 @@ define void @test_runtime_check(ptr %a, float %b, i64 %offset, i64 %offset2, i64 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll index d1324314eb953..038b482209592 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll @@ -115,7 +115,7 @@ define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -259,7 +259,7 @@ define void @check_creation_order(ptr %a, ptr %b, i32 %m) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 31996, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 31996, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index da8e2add18a67..468b3ca337d7b 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -83,7 +83,7 @@ define void @diff_checks(ptr nocapture noundef writeonly %dst, ptr nocapture nou ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ] @@ -205,7 +205,7 @@ define void @full_checks(ptr nocapture noundef %dst, ptr nocapture noundef reado ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ] @@ -337,7 +337,7 @@ define void @full_checks_diff_strides(ptr nocapture noundef %dst, ptr nocapture ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ] @@ -452,7 +452,7 @@ define void @diff_checks_src_start_invariant(ptr nocapture noundef writeonly %ds ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ] @@ -566,7 +566,7 @@ define void @full_checks_src_start_invariant(ptr nocapture noundef %dst, ptr noc ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ] @@ -711,7 +711,7 @@ define void @triple_nested_loop_mixed_access(ptr nocapture noundef %dst, ptr noc ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_END]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -869,7 +869,7 @@ define void @uncomputable_outer_tc(ptr nocapture noundef %dst, ptr nocapture nou ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ] @@ -1038,7 +1038,7 @@ define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP15]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[OUTER_LOOP]] ], [ [[TMP0]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ] @@ -1200,7 +1200,7 @@ define void @decreasing_outer_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ] @@ -1355,7 +1355,7 @@ define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noun ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_LOOP_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER_LOOP]] ] @@ -1470,7 +1470,7 @@ define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr noca ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[INNER_EXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_LOOP]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[OUTER_LOOP]] ] ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_INNER:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_INNER_NEXT:%.*]], [[INNER_LOOP]] ] @@ -1560,7 +1560,7 @@ define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[OUTER_LATCH]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[OUTER_HEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[OUTER_HEADER]] ] ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll index 7562a5873036e..d0f9ae28ae946 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll @@ -72,10 +72,10 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT_2:%.*]], label [[SCALAR_PH3]] ; CHECK: scalar.ph3: -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK2]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK2]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[EXIT_1]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: -; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL5]], [[SCALAR_PH3]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL8]], [[SCALAR_PH3]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] ; CHECK-NEXT: [[IV_2_NEXT]] = add i8 [[IV_2]], 1 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i8 [[IV_2_NEXT]] ; CHECK-NEXT: store i8 0, ptr [[GEP_A]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll index 37a998cb7e938..e53de22f74b2c 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll @@ -49,7 +49,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[LOOP]] ] @@ -117,8 +117,8 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -2, [[MIDDLE_BLOCK]] ], [ 30, [[ENTRY:%.*]] ], [ 30, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ], [ [[START]], [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -2, [[MIDDLE_BLOCK]] ], [ 30, [[VECTOR_SCEVCHECK]] ], [ 30, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_SCEVCHECK]] ], [ [[START]], [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] @@ -213,8 +213,8 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 576a971c5eaa8..60f85e7d5936a 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -549,9 +549,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK-VF4-IC1: scalar.ph: -; CHECK-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ], [ true, [[VECTOR_MEMCHECK]] ] -; CHECK-VF4-IC1-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i1 [ [[RDX_SELECT9]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ], [ false, [[VECTOR_MEMCHECK]] ] +; CHECK-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ] +; CHECK-VF4-IC1-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i1 [ [[RDX_SELECT9]], [[MIDDLE_BLOCK]] ], [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ] ; CHECK-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF4-IC1: for.body: ; CHECK-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ] @@ -711,9 +711,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF4-IC2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK-VF4-IC2: scalar.ph: -; CHECK-VF4-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-VF4-IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ], [ true, [[VECTOR_MEMCHECK]] ] -; CHECK-VF4-IC2-NEXT: [[BC_MERGE_RDX22:%.*]] = phi i1 [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ], [ false, [[VECTOR_MEMCHECK]] ] +; CHECK-VF4-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-VF4-IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ] +; CHECK-VF4-IC2-NEXT: [[BC_MERGE_RDX22:%.*]] = phi i1 [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ], [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ] ; CHECK-VF4-IC2-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF4-IC2: for.body: ; CHECK-VF4-IC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ] @@ -808,9 +808,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF1-IC2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK-VF1-IC2: scalar.ph: -; CHECK-VF1-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-VF1-IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ], [ true, [[VECTOR_MEMCHECK]] ] -; CHECK-VF1-IC2-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i1 [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ], [ false, [[VECTOR_MEMCHECK]] ] +; CHECK-VF1-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-VF1-IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ] +; CHECK-VF1-IC2-NEXT: [[BC_MERGE_RDX9:%.*]] = phi i1 [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ], [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ] ; CHECK-VF1-IC2-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF1-IC2: for.body: ; CHECK-VF1-IC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 05f46e8e27706..7590bb9d68680 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -211,8 +211,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index d9827a8c71ee6..75472d29d6ed0 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -59,7 +59,7 @@ define i16 @test(ptr %arg, i64 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[LOOP_3_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP_3:%.*]] ; CHECK: inner.latch: ; CHECK-NEXT: [[C_4:%.*]] = call i1 @cond() diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index bb938045a5413..930d3cd41d31d 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -40,8 +40,8 @@ define void @test_versioned_with_sext_use(i32 %offset, ptr %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[OUTER_HEADER_LOOPEXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_1]], [[INNER_LOOP_PREHEADER]] ], [ [[IV_1]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[INNER_LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_1]], [[VECTOR_SCEVCHECK]] ], [ [[IV_1]], [[INNER_LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[INNER_LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[INNER_LOOP]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT]], [[INNER_LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -115,8 +115,8 @@ define void @test_versioned_with_zext_use(i32 %offset, ptr %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[OUTER_HEADER_LOOPEXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_1]], [[INNER_LOOP_PREHEADER]] ], [ [[IV_1]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[INNER_LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_1]], [[VECTOR_SCEVCHECK]] ], [ [[IV_1]], [[INNER_LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[INNER_LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[INNER_LOOP]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT]], [[INNER_LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -188,7 +188,7 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -274,8 +274,8 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1, ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[OUTER_HEADER_LOOPEXIT]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_1]], [[INNER_LOOP_PREHEADER]] ], [ [[IV_1]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[INNER_LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_1]], [[VECTOR_SCEVCHECK]] ], [ [[IV_1]], [[INNER_LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[INNER_LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[INNER_LOOP]] ; CHECK: inner.loop: ; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT]], [[INNER_LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -371,8 +371,8 @@ define void @test_versioned_with_non_ex_use(i32 %offset, ptr noalias %dst.1, ptr ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] @@ -448,7 +448,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -513,7 +513,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll index 579961f546c0b..2f61c89241fd4 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll @@ -76,7 +76,7 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER13]] ; CHECK: for.body.preheader13: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER13]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index e22e4a4b31ebe..7fccfeea39bb9 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -85,7 +85,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP210_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0 ; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64 -; CHECK-NEXT: br i1 [[CMP210_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] +; CHECK-NEXT: br i1 [[CMP210_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: for.cond1.preheader.us.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV6]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 360 @@ -95,7 +95,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[CONV6]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ult i32 [[I]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] @@ -145,7 +145,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[CONV6]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]], label [[FOR_BODY4_US_PREHEADER]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[VECTOR_MEMCHECK_1:%.*]], label [[FOR_BODY4_US_PREHEADER]] ; CHECK: for.body4.us.preheader: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] @@ -163,14 +163,14 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: store double [[SUB_US]], ptr [[TMP29]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[VECTOR_MEMCHECK_1]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: ; CHECK-NEXT: [[TMP30:%.*]] = add nuw nsw i64 [[CONV6]], 15 ; CHECK-NEXT: [[TMP31:%.*]] = icmp samesign ult i32 [[I]], 210 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP31]]) ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP30]] ; CHECK-NEXT: [[MIN_ITERS_CHECK_1:%.*]] = icmp samesign ult i32 [[I]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_1]], label [[FOR_BODY4_US_PREHEADER_1:%.*]], label [[VECTOR_MEMCHECK_1:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_1]], label [[FOR_BODY4_US_PREHEADER_1:%.*]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ; CHECK: vector.memcheck.1: ; CHECK-NEXT: [[BOUND0_1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]] ; CHECK-NEXT: [[BOUND1_1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] @@ -221,7 +221,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: br i1 [[TMP56]], label [[MIDDLE_BLOCK_1:%.*]], label [[VECTOR_BODY_1]], !llvm.loop [[LOOP7]] ; CHECK: middle.block.1: ; CHECK-NEXT: [[CMP_N_1:%.*]] = icmp eq i64 [[N_VEC_1]], [[CONV6]] -; CHECK-NEXT: br i1 [[CMP_N_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]], label [[FOR_BODY4_US_PREHEADER_1]] +; CHECK-NEXT: br i1 [[CMP_N_1]], label [[VECTOR_MEMCHECK_2:%.*]], label [[FOR_BODY4_US_PREHEADER_1]] ; CHECK: for.body4.us.preheader.1: ; CHECK-NEXT: [[INDVARS_IV_PH_1:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK_1]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[N_VEC_1]], [[MIDDLE_BLOCK_1]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] @@ -240,14 +240,14 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: store double [[SUB_US_1]], ptr [[TMP60]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]], label [[FOR_BODY4_US_1]], !llvm.loop [[LOOP10]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_1]], label [[VECTOR_MEMCHECK_2]], label [[FOR_BODY4_US_1]], !llvm.loop [[LOOP10]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1: ; CHECK-NEXT: [[TMP61:%.*]] = add nuw nsw i64 [[CONV6]], 30 ; CHECK-NEXT: [[TMP62:%.*]] = icmp samesign ult i32 [[I]], 195 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP62]]) ; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP61]] ; CHECK-NEXT: [[MIN_ITERS_CHECK_2:%.*]] = icmp samesign ult i32 [[I]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_2]], label [[FOR_BODY4_US_PREHEADER_2:%.*]], label [[VECTOR_MEMCHECK_2:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_2]], label [[FOR_BODY4_US_PREHEADER_2:%.*]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]] ; CHECK: vector.memcheck.2: ; CHECK-NEXT: [[BOUND0_2:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]] ; CHECK-NEXT: [[BOUND1_2:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] @@ -298,7 +298,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: br i1 [[TMP87]], label [[MIDDLE_BLOCK_2:%.*]], label [[VECTOR_BODY_2]], !llvm.loop [[LOOP7]] ; CHECK: middle.block.2: ; CHECK-NEXT: [[CMP_N_2:%.*]] = icmp eq i64 [[N_VEC_2]], [[CONV6]] -; CHECK-NEXT: br i1 [[CMP_N_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]], label [[FOR_BODY4_US_PREHEADER_2]] +; CHECK-NEXT: br i1 [[CMP_N_2]], label [[VECTOR_MEMCHECK_3:%.*]], label [[FOR_BODY4_US_PREHEADER_2]] ; CHECK: for.body4.us.preheader.2: ; CHECK-NEXT: [[INDVARS_IV_PH_2:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK_2]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[N_VEC_2]], [[MIDDLE_BLOCK_2]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] @@ -317,14 +317,14 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: store double [[SUB_US_2]], ptr [[TMP91]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT_2]] = add nuw nsw i64 [[INDVARS_IV_2]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]], label [[FOR_BODY4_US_2]], !llvm.loop [[LOOP10]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_2]], label [[VECTOR_MEMCHECK_3]], label [[FOR_BODY4_US_2]], !llvm.loop [[LOOP10]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2: ; CHECK-NEXT: [[TMP92:%.*]] = add nuw nsw i64 [[CONV6]], 45 ; CHECK-NEXT: [[TMP93:%.*]] = icmp samesign ult i32 [[I]], 180 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP93]]) ; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP92]] ; CHECK-NEXT: [[MIN_ITERS_CHECK_3:%.*]] = icmp samesign ult i32 [[I]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_3]], label [[FOR_BODY4_US_PREHEADER_3:%.*]], label [[VECTOR_MEMCHECK_3:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_3]], label [[FOR_BODY4_US_PREHEADER_3:%.*]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]] ; CHECK: vector.memcheck.3: ; CHECK-NEXT: [[BOUND0_3:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]] ; CHECK-NEXT: [[BOUND1_3:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll index 2fe420183c683..9196e3c96462a 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll @@ -109,7 +109,7 @@ define dso_local void @test(ptr %start, ptr %end) #0 { ; AVX2-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]] ; AVX2-NEXT: br i1 [[CMP_N17]], label [[EXIT]], label [[BB12_PREHEADER1]] ; AVX2: bb12.preheader: -; AVX2-NEXT: [[PTR2_PH:%.*]] = phi ptr [ [[START]], [[BB12_PREHEADER]] ], [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; AVX2-NEXT: [[PTR2_PH:%.*]] = phi ptr [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[BB12_PREHEADER]] ], [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; AVX2-NEXT: br label [[BB13:%.*]] ; AVX2: bb12: ; AVX2-NEXT: [[PTR2:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LATCH:%.*]] ], [ [[PTR2_PH]], [[BB12_PREHEADER1]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 7817c23e6a3ec..703a53949a063 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -90,7 +90,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[N_VEC11]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N17]], label [[FOR_END]], label [[FOR_BODY_PREHEADER9]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[TMP43:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP43]], 7 ; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 From 984cb791db347689c4df222e85069ac58929d5ed Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 17 Dec 2024 12:50:09 -0800 Subject: [PATCH 21/35] [RISCV] Use vmv.v.x to materialize masks in deinterleave2 lowering (#118500) This is a follow up to 2af2634 to use vmv.v.x of i8 constants instead of the prior vid/vand/vmsne sequence. The advantage of the vmv.v.x sequence is that it's always m1 (so cheaper at high LMUL), and can be rematerialized by the register allocator if needed to locally reduce register pressure. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 32 ++-- .../RISCV/rvv/vector-deinterleave-load.ll | 58 +++---- .../CodeGen/RISCV/rvv/vector-deinterleave.ll | 141 ++++++++---------- 3 files changed, 102 insertions(+), 129 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d9a923d3e0ee1..b703eb90e8ef3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10901,23 +10901,23 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, return DAG.getMergeValues({Even, Odd}, DL); } - // For the indices, use the same SEW to avoid an extra vsetvli - // TODO: If container type is larger than m1, we can consider using a splat - // of a constant instead of the following sequence - - // Create a vector of even indices {0, 1, 2, ...} - MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger(); - SDValue StepVec = DAG.getStepVector(DL, IdxVT); - // 0, 1, 0, 1, 0, 1 - SDValue ZeroOnes = - DAG.getNode(ISD::AND, DL, IdxVT, StepVec, DAG.getConstant(1, DL, IdxVT)); + // For the indices, use the vmv.v.x of an i8 constant to fill the largest + // possibly mask vector, then extract the required subvector. Doing this + // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask + // creation to be rematerialized during register allocation to reduce + // register pressure if needed. + MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1); - SDValue EvenMask = - DAG.getSetCC(DL, MaskVT, ZeroOnes, DAG.getConstant(0, DL, IdxVT), - ISD::CondCode::SETEQ); - // Have the latter be the not of the former to minimize the live range of - // the index vector since that might be large. - SDValue OddMask = DAG.getLogicalNOT(DL, EvenMask, MaskVT); + + SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8); + EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat); + SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat, + DAG.getVectorIdxConstant(0, DL)); + + SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8); + OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat); + SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat, + DAG.getVectorIdxConstant(0, DL)); // vcompress the even and odd elements into two separate vectors SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat, diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 4338d1f61af72..34f0f9d9598c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -106,56 +106,44 @@ define {, } @vector_deinterleave_load_nxv8i6 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: li a1, 85 +; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a1 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vmseq.vi v24, v8, 0 -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmnot.m v6, v24 -; CHECK-NEXT: vcompress.vm v8, v16, v24 -; CHECK-NEXT: vmv1r.v v13, v24 -; CHECK-NEXT: vcompress.vm v24, v16, v6 -; CHECK-NEXT: vmv1r.v v12, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vcompress.vm v0, v16, v13 +; CHECK-NEXT: li a1, 170 +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: vmv.v.x v17, a1 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vcompress.vm v8, v24, v16 +; CHECK-NEXT: vmv1r.v v12, v16 +; CHECK-NEXT: vmv1r.v v13, v17 +; CHECK-NEXT: vcompress.vm v16, v24, v13 +; CHECK-NEXT: vcompress.vm v24, v0, v12 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vcompress.vm v0, v16, v12 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vcompress.vm v24, v0, v13 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v12, v16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v12, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v28, v16 -; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v20, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 2291475cef014..bb71c2973bb57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -71,15 +71,16 @@ ret {, } %retval define {, } @vector_deinterleave_nxv2i64_nxv4i64( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64: ; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vmv.v.x v17, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vand.vi v12, v12, 1 -; CHECK-NEXT: vmseq.vi v16, v12, 0 ; CHECK-NEXT: vcompress.vm v12, v8, v16 -; CHECK-NEXT: vmnot.m v14, v16 -; CHECK-NEXT: vcompress.vm v16, v8, v14 +; CHECK-NEXT: vcompress.vm v20, v8, v17 ; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: vmv2r.v v10, v16 +; CHECK-NEXT: vmv2r.v v10, v20 ; CHECK-NEXT: ret %retval = call {, } @llvm.vector.deinterleave2.nxv4i64( %vec) ret {, } %retval @@ -88,15 +89,16 @@ ret {, } %retval define {, } @vector_deinterleave_nxv4i64_nxv8i64( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vand.vi v16, v16, 1 -; CHECK-NEXT: vmseq.vi v24, v16, 0 ; CHECK-NEXT: vcompress.vm v16, v8, v24 -; CHECK-NEXT: vmnot.m v20, v24 -; CHECK-NEXT: vcompress.vm v24, v8, v20 +; CHECK-NEXT: vcompress.vm v0, v8, v25 ; CHECK-NEXT: vmv4r.v v8, v16 -; CHECK-NEXT: vmv4r.v v12, v24 +; CHECK-NEXT: vmv4r.v v12, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.vector.deinterleave2.nxv8i64( %vec) ret {, } %retval @@ -182,50 +184,41 @@ define {, } @vector_deinterleave_nxv8i64_nxv ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v7, a0 +; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vmv.v.x v6, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vand.vi v24, v16, 1 -; CHECK-NEXT: vmseq.vi v16, v24, 0 -; CHECK-NEXT: vcompress.vm v24, v8, v16 +; CHECK-NEXT: vcompress.vm v24, v8, v7 +; CHECK-NEXT: vmv1r.v v28, v7 +; CHECK-NEXT: vmv1r.v v29, v6 +; CHECK-NEXT: vcompress.vm v0, v8, v29 +; CHECK-NEXT: vcompress.vm v8, v16, v28 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmnot.m v17, v16 -; CHECK-NEXT: vcompress.vm v0, v8, v17 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vcompress.vm v24, v8, v16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vcompress.vm v8, v16, v29 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vcompress.vm v24, v8, v17 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v28, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v20, v8 -; CHECK-NEXT: vmv4r.v v4, v24 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmv4r.v v4, v8 +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -350,15 +343,16 @@ ret {, } %retval define {, } @vector_deinterleave_nxv2f64_nxv4f64( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vmv.v.x v17, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vand.vi v12, v12, 1 -; CHECK-NEXT: vmseq.vi v16, v12, 0 ; CHECK-NEXT: vcompress.vm v12, v8, v16 -; CHECK-NEXT: vmnot.m v14, v16 -; CHECK-NEXT: vcompress.vm v16, v8, v14 +; CHECK-NEXT: vcompress.vm v20, v8, v17 ; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: vmv2r.v v10, v16 +; CHECK-NEXT: vmv2r.v v10, v20 ; CHECK-NEXT: ret %retval = call {, } @llvm.vector.deinterleave2.nxv4f64( %vec) ret {, } %retval @@ -423,50 +417,41 @@ define {, } @vector_deinterleave_nxv8f ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: li a0, 85 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v7, a0 +; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vmv.v.x v6, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vand.vi v24, v16, 1 -; CHECK-NEXT: vmseq.vi v16, v24, 0 -; CHECK-NEXT: vcompress.vm v24, v8, v16 +; CHECK-NEXT: vcompress.vm v24, v8, v7 +; CHECK-NEXT: vmv1r.v v28, v7 +; CHECK-NEXT: vmv1r.v v29, v6 +; CHECK-NEXT: vcompress.vm v0, v8, v29 +; CHECK-NEXT: vcompress.vm v8, v16, v28 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmnot.m v17, v16 -; CHECK-NEXT: vcompress.vm v0, v8, v17 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vcompress.vm v24, v8, v16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vcompress.vm v8, v16, v29 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vcompress.vm v24, v8, v17 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v28, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v20, v8 -; CHECK-NEXT: vmv4r.v v4, v24 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vmv4r.v v4, v8 +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 From d7d0e740cc170d471e080fd5bb09633870272073 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 17 Dec 2024 12:50:49 -0800 Subject: [PATCH 22/35] [MemProf] Refactor single alloc type handling and use in more cases (#120290) Emit message when we have aliased contexts that are conservatively hinted not cold. This is not a change in behavior, just in message when the -memprof-report-hinted-sizes flag is enabled. --- .../include/llvm/Analysis/MemoryProfileInfo.h | 6 ++++ llvm/lib/Analysis/MemoryProfileInfo.cpp | 35 +++++++++++-------- .../PGOProfile/memprof_loop_unroll.ll | 11 ++++-- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h index b46124a4ed0d5..215139caef696 100644 --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -117,6 +117,12 @@ class CallStackTrie { /// which is lower overhead and more direct than maintaining this metadata. /// Returns true if memprof metadata attached, false if not (attribute added). bool buildAndAttachMIBMetadata(CallBase *CI); + + /// Add an attribute for the given allocation type to the call instruction. + /// If hinted by reporting is enabled, a message is emitted with the given + /// descriptor used to identify the category of single allocation type. + void addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, + StringRef Descriptor); }; /// Helper class to iterate through stack ids in both metadata (memprof MIB and diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index 217f304e30de1..1c3f589e84941 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -278,26 +278,30 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, return true; } +void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT, + StringRef Descriptor) { + addAllocTypeAttribute(CI->getContext(), CI, AT); + if (MemProfReportHintedSizes) { + std::vector ContextSizeInfo; + collectContextSizeInfo(Alloc, ContextSizeInfo); + for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { + errs() << "MemProf hinting: Total size for full allocation context hash " + << FullStackId << " and " << Descriptor << " alloc type " + << getAllocTypeAttributeString(AT) << ": " << TotalSize << "\n"; + } + } +} + // Build and attach the minimal necessary MIB metadata. If the alloc has a // single allocation type, add a function attribute instead. Returns true if // memprof metadata attached, false if not (attribute added). bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { - auto &Ctx = CI->getContext(); if (hasSingleAllocType(Alloc->AllocTypes)) { - addAllocTypeAttribute(Ctx, CI, (AllocationType)Alloc->AllocTypes); - if (MemProfReportHintedSizes) { - std::vector ContextSizeInfo; - collectContextSizeInfo(Alloc, ContextSizeInfo); - for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) { - errs() - << "MemProf hinting: Total size for full allocation context hash " - << FullStackId << " and single alloc type " - << getAllocTypeAttributeString((AllocationType)Alloc->AllocTypes) - << ": " << TotalSize << "\n"; - } - } + addSingleAllocTypeAttribute(CI, (AllocationType)Alloc->AllocTypes, + "single"); return false; } + auto &Ctx = CI->getContext(); std::vector MIBCallStack; MIBCallStack.push_back(AllocStackId); std::vector MIBNodes; @@ -314,8 +318,9 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { // If there exists corner case that CallStackTrie has one chain to leaf // and all node in the chain have multi alloc type, conservatively give // it non-cold allocation type. - // FIXME: Avoid this case before memory profile created. - addAllocTypeAttribute(Ctx, CI, AllocationType::NotCold); + // FIXME: Avoid this case before memory profile created. Alternatively, select + // hint based on fraction cold. + addSingleAllocTypeAttribute(CI, AllocationType::NotCold, "indistinguishable"); return false; } diff --git a/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll b/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll index 746db4c7153d1..9bc1282ab4529 100644 --- a/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll +++ b/llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll @@ -1,4 +1,5 @@ -;; Tests memprof when contains loop unroll. +;; Tests memprof when contains loop unroll of allocation, where the unrolled +;; allocations have the same context but different allocation types. ;; Avoid failures on big-endian systems that can't read the profile properly ; REQUIRES: x86_64-linux @@ -9,8 +10,12 @@ ;; $ clang++ -gmlt -fdebug-info-for-profiling -S %S/Inputs/memprof_loop_unroll_b.cc -emit-llvm ; RUN: llvm-profdata merge %S/Inputs/memprof_loop_unroll.memprofraw --profiled-binary %S/Inputs/memprof_loop_unroll.exe -o %t.memprofdata -; RUN: opt < %s -passes='memprof-use' -S | FileCheck %s +; RUN: opt < %s -passes='memprof-use' -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s +;; Conservatively annotate as not cold. We get two messages as there are two +;; unrolled copies of the allocation. +; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and indistinguishable alloc type notcold: 4 +; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and indistinguishable alloc type notcold: 4 ; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]] ; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" } ; CHECK-NOT: stackIds: () @@ -93,4 +98,4 @@ attributes #2 = { builtin allocsize(0) } !27 = distinct !{!27, !28, !23, !29} !28 = !DILocation(line: 5, column: 5, scope: !10) !29 = !{!"llvm.loop.mustprogress"} -!30 = !DILocation(line: 8, column: 1, scope: !10) \ No newline at end of file +!30 = !DILocation(line: 8, column: 1, scope: !10) From bbeafe4b94bb3db89b579d8a6fd914cc96b0af06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 17 Dec 2024 12:56:39 -0800 Subject: [PATCH 23/35] [flang][cuda] Apply implict data attribute to local arrays (#120293) Add the implicit data attribute to local arrays that don't have one. This simplifies the host array detection in semantic. --- flang/lib/Semantics/resolve-names.cpp | 10 +++++----- flang/test/Semantics/cuf09.cuf | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 3a1ccec1fdf4b..51e8b15e6adf0 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -8973,12 +8973,12 @@ void ResolveNamesVisitor::FinishSpecificationPart( if (NeedsExplicitType(symbol)) { ApplyImplicitRules(symbol); } - if (inDeviceSubprogram && IsDummy(symbol) && - symbol.has()) { - auto *dummy{symbol.detailsIf()}; - if (!dummy->cudaDataAttr() && !IsValue(symbol)) { + if (inDeviceSubprogram && symbol.has()) { + auto *object{symbol.detailsIf()}; + if (!object->cudaDataAttr() && !IsValue(symbol) && + (IsDummy(symbol) || object->IsArray())) { // Implicitly set device attribute if none is set in device context. - dummy->set_cudaDataAttr(common::CUDADataAttr::Device); + object->set_cudaDataAttr(common::CUDADataAttr::Device); } } if (IsDummy(symbol) && isImplicitNoneType() && diff --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf index c551ecbff2cc0..e0ca814aec26a 100644 --- a/flang/test/Semantics/cuf09.cuf +++ b/flang/test/Semantics/cuf09.cuf @@ -22,6 +22,12 @@ module m !ERROR: Host array 'm' cannot be present in device context if (i .le. N) a(i) = m(i) end subroutine + + attributes(global) subroutine localarray() + integer :: a(10) + i = threadIdx%x + a(i) = i + end subroutine end program main From 48c20e7106bb15398b55a13d0a2ec74cb51d055e Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 17 Dec 2024 16:10:02 -0500 Subject: [PATCH 24/35] [RISCV][VLOPT] Do not optimize VL when isVectorOpUsedAsScalarOp (#120291) This does not have tests, so we will remove this for now and add it back later with tests. --- llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index bd48d730a5704..a9e5bb6ecd9b8 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -887,13 +887,10 @@ bool RISCVVLOptimizer::checkUsers(const MachineOperand *&CommonVL, // Instructions like reductions may use a vector register as a scalar // register. In this case, we should treat it like a scalar register which // does not impact the decision on whether to optimize VL. + // TODO: Treat it like a scalar register instead of bailing out. if (isVectorOpUsedAsScalarOp(UserOp)) { - [[maybe_unused]] Register R = UserOp.getReg(); - [[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R); - assert(RISCV::VRRegClass.hasSubClassEq(RC) && - "Expect LMUL 1 register class for vector as scalar operands!"); - LLVM_DEBUG(dbgs() << " Use this operand as a scalar operand\n"); - continue; + CanReduceVL = false; + break; } if (mayReadPastVL(UserMI)) { From 7f2fb8061e97ff21caa04b9c44cac2f599202232 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 17 Dec 2024 13:12:29 -0800 Subject: [PATCH 25/35] [memprof] Don't use Frame::hash or hashCallStacks in unit test (#119984) This patch checks the result of YAML parsing at the level of MemProfRecord instead of IndexedMemProfRecord, thereby avoiding use of Frame::hash and hashCallStacks. This makes sense because we ultimately care about consumers like MemProfiler.cpp obtaining MemProfRecord correctly; IndexedMemProfData and hash values are just intermediaries. Once this patch lands, we call Frame::hash and hashCallStacks only when adding Frames or call stacks to their respective data structures. In other words, the hash functions are pretty much business internal to IndexedMemProfRecord. --- llvm/unittests/ProfileData/MemProfTest.cpp | 77 +++++++++------------- 1 file changed, 30 insertions(+), 47 deletions(-) diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 2cb4725ab89e3..2eb85d5b2f587 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -709,47 +709,33 @@ TEST(MemProf, YAMLParser) { YAMLReader.parse(YAMLData); IndexedMemProfData MemProfData = YAMLReader.takeMemProfData(); - Frame F1(0x100, 11, 10, true); - Frame F2(0x200, 22, 20, false); - Frame F3(0x300, 33, 30, false); - Frame F4(0x400, 44, 40, true); - Frame F5(0x500, 55, 50, true); - Frame F6(0x600, 66, 60, false); - Frame F7(0x700, 77, 70, true); - Frame F8(0x800, 88, 80, false); - - llvm::SmallVector CS1 = {F1.hash(), F2.hash()}; - llvm::SmallVector CS2 = {F3.hash(), F4.hash()}; - llvm::SmallVector CS3 = {F5.hash(), F6.hash()}; - llvm::SmallVector CS4 = {F7.hash(), F8.hash()}; - - // Verify the entire contents of MemProfData.Frames. - EXPECT_THAT(MemProfData.Frames, - UnorderedElementsAre(Pair(F1.hash(), F1), Pair(F2.hash(), F2), - Pair(F3.hash(), F3), Pair(F4.hash(), F4), - Pair(F5.hash(), F5), Pair(F6.hash(), F6), - Pair(F7.hash(), F7), Pair(F8.hash(), F8))); - - // Verify the entire contents of MemProfData.Frames. - EXPECT_THAT(MemProfData.CallStacks, - UnorderedElementsAre(Pair(hashCallStack(CS1), CS1), - Pair(hashCallStack(CS2), CS2), - Pair(hashCallStack(CS3), CS3), - Pair(hashCallStack(CS4), CS4))); - // Verify the entire contents of MemProfData.Records. ASSERT_THAT(MemProfData.Records, SizeIs(1)); - const auto &[GUID, Record] = MemProfData.Records.front(); + const auto &[GUID, IndexedRecord] = MemProfData.Records.front(); EXPECT_EQ(GUID, 0xdeadbeef12345678ULL); + + FrameIdConverter FrameIdConv( + MemProfData.Frames); + CallStackIdConverter CSIdConv( + MemProfData.CallStacks, FrameIdConv); + MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv); + ASSERT_THAT(Record.AllocSites, SizeIs(2)); - EXPECT_EQ(Record.AllocSites[0].CSId, hashCallStack(CS1)); + EXPECT_THAT( + Record.AllocSites[0].CallStack, + ElementsAre(Frame(0x100, 11, 10, true), Frame(0x200, 22, 20, false))); EXPECT_EQ(Record.AllocSites[0].Info.getAllocCount(), 777U); EXPECT_EQ(Record.AllocSites[0].Info.getTotalSize(), 888U); - EXPECT_EQ(Record.AllocSites[1].CSId, hashCallStack(CS2)); + EXPECT_THAT( + Record.AllocSites[1].CallStack, + ElementsAre(Frame(0x300, 33, 30, false), Frame(0x400, 44, 40, true))); EXPECT_EQ(Record.AllocSites[1].Info.getAllocCount(), 666U); EXPECT_EQ(Record.AllocSites[1].Info.getTotalSize(), 555U); - EXPECT_THAT(Record.CallSiteIds, - ElementsAre(hashCallStack(CS3), hashCallStack(CS4))); + EXPECT_THAT(Record.CallSites, + ElementsAre(ElementsAre(Frame(0x500, 55, 50, true), + Frame(0x600, 66, 60, false)), + ElementsAre(Frame(0x700, 77, 70, true), + Frame(0x800, 88, 80, false)))); } // Verify that the YAML parser accepts a GUID expressed as a function name. @@ -769,24 +755,21 @@ TEST(MemProf, YAMLParserGUID) { YAMLReader.parse(YAMLData); IndexedMemProfData MemProfData = YAMLReader.takeMemProfData(); - Frame F1(0x100, 11, 10, true); - - llvm::SmallVector CS1 = {F1.hash()}; - - // Verify the entire contents of MemProfData.Frames. - EXPECT_THAT(MemProfData.Frames, UnorderedElementsAre(Pair(F1.hash(), F1))); - - // Verify the entire contents of MemProfData.Frames. - EXPECT_THAT(MemProfData.CallStacks, - UnorderedElementsAre(Pair(hashCallStack(CS1), CS1))); - // Verify the entire contents of MemProfData.Records. ASSERT_THAT(MemProfData.Records, SizeIs(1)); - const auto &[GUID, Record] = MemProfData.Records.front(); + const auto &[GUID, IndexedRecord] = MemProfData.Records.front(); EXPECT_EQ(GUID, IndexedMemProfRecord::getGUID("_Z3fooi")); + + FrameIdConverter FrameIdConv( + MemProfData.Frames); + CallStackIdConverter CSIdConv( + MemProfData.CallStacks, FrameIdConv); + MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv); + ASSERT_THAT(Record.AllocSites, SizeIs(1)); - EXPECT_EQ(Record.AllocSites[0].CSId, hashCallStack(CS1)); - EXPECT_THAT(Record.CallSiteIds, IsEmpty()); + EXPECT_THAT(Record.AllocSites[0].CallStack, + ElementsAre(Frame(0x100, 11, 10, true))); + EXPECT_THAT(Record.CallSites, IsEmpty()); } template std::string serializeInYAML(T &Val) { From b0fbddde381616d1f0e82899b0fc8c3cf7072c4d Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 17 Dec 2024 13:57:16 -0600 Subject: [PATCH 26/35] [OpenMP] Only put `retain` for NVPTX so it can be optimized out for AMD Summary: This is a hack that only NVPTX needs. --- offload/DeviceRTL/src/Misc.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/offload/DeviceRTL/src/Misc.cpp b/offload/DeviceRTL/src/Misc.cpp index c1df477365bcb..00935cce05e47 100644 --- a/offload/DeviceRTL/src/Misc.cpp +++ b/offload/DeviceRTL/src/Misc.cpp @@ -115,8 +115,12 @@ void *indirectCallLookup(void *HstPtr) { /// The openmp client instance used to communicate with the server. /// FIXME: This is marked as 'retain' so that it is not removed via /// `-mlink-builtin-bitcode` +#ifdef __NVPTX__ [[gnu::visibility("protected"), gnu::weak, gnu::retain]] rpc::Client Client asm("__llvm_rpc_client"); +#else +[[gnu::visibility("protected"), gnu::weak]] rpc::Client Client asm("__llvm_rpc_client"); +#endif } // namespace impl } // namespace ompx From 169c32eb49fa9b559d388b9b8f4374ff9e1be9be Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 17 Dec 2024 16:19:35 -0500 Subject: [PATCH 27/35] [RISCV][VLOPT] Enable the RISCVVLOptimizer by default (#119461) Now that we have testing of all instructions in the isSupportedInstr switch, and better coverage of getOperandInfo, I think it is a good time to enable this by default. --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +- llvm/test/CodeGen/RISCV/O3-pipeline.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 6 +- .../CodeGen/RISCV/rvv/fixed-vectors-abs.ll | 2 - .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 20 +-- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 3 +- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 6 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 4 +- .../fixed-vectors-shuffle-changes-length.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 63 +++----- llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll | 2 - llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 148 +++++++++--------- llvm/test/CodeGen/RISCV/rvv/vl-opt-op-info.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll | 2 - llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll | 2 - llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll | 7 +- llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll | 2 - llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll | 3 +- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 3 - llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll | 2 - llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll | 90 ++++------- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 7 +- 35 files changed, 178 insertions(+), 267 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 4bcb0edc4b093..0b8407943a907 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -105,7 +105,7 @@ static cl::opt EnablePostMISchedLoadStoreClustering( static cl::opt EnableVLOptimizer("riscv-enable-vl-optimizer", cl::desc("Enable the RISC-V VL Optimizer pass"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); static cl::opt DisableVectorMaskMutation( "riscv-disable-vector-mask-mutation", diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 8fd9ae9850366..b0c756e26985b 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -119,6 +119,8 @@ ; RV64-NEXT: RISC-V Optimize W Instructions ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Merge Base Offset +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: RISC-V VL Optimizer ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup @@ -129,7 +131,6 @@ ; CHECK-NEXT: Live Variable Analysis ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass -; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: Register Coalescer diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index ce4bc48dff042..6f515996677ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2654,9 +2654,8 @@ define @vp_ctlo_zero_undef_nxv1i9( %va, @vp_ctlo_zero_undef_nxv1i9( %va, @buildvec_not_vid_v4i8_2() { define <16 x i8> @buildvec_not_vid_v16i8() { ; CHECK-LABEL: buildvec_not_vid_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 7, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 7, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 6 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 10156141119a7..0bd8466669dc8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -348,8 +348,9 @@ define <8 x i8> @splat_ve4_ins_i0ve2(<8 x i8> %v) { define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { ; CHECK-LABEL: splat_ve4_ins_i1ve3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v10, v9, 1 @@ -432,8 +433,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index cba8de82ec41b..59c7feb53ce94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1100,15 +1100,17 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 1 ; CHECK-NEXT: li a1, 33 ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, %hi(.LCPI66_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v11, 3 ; CHECK-NEXT: vle16.v v12, (a1) ; CHECK-NEXT: vmerge.vim v11, v11, 2, v0 -; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vmv1r.v v13, v9 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v9, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index 66f95b7077672..abbbfe8f252fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -97,8 +97,9 @@ define <4 x i32> @v4i32_v8i32(<8 x i32>) { define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; RV32-LABEL: v4i32_v16i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, 1 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v14, 6 ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vmv.v.i v0, 10 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index 2392cb77c753d..61cc754e21df8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -184,9 +184,8 @@ define @icmp_uge_vv_nxv1i8( %va, @icmp_uge_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -348,9 +347,8 @@ define @icmp_sge_vv_nxv1i8( %va, @icmp_sge_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -470,9 +468,8 @@ define @icmp_sle_vx_nxv1i8( %va, i8 %b, @icmp_sle_vx_swap_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -543,10 +540,9 @@ define @icmp_eq_vv_nxv8i7( %va, @llvm.vp.icmp.nxv8i7( %va, %vb, metadata !"eq", %m, i32 %evl) @@ -557,11 +553,10 @@ define @icmp_eq_vx_nxv8i7( %va, i7 %b, poison, i7 %b, i32 0 @@ -574,11 +569,10 @@ define @icmp_eq_vx_swap_nxv8i7( %va, i7 %b, < ; CHECK-LABEL: icmp_eq_vx_swap_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmseq.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 @@ -764,9 +758,8 @@ define @icmp_uge_vv_nxv8i8( %va, @icmp_uge_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -928,9 +921,8 @@ define @icmp_sge_vv_nxv8i8( %va, @icmp_sge_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -1050,9 +1042,8 @@ define @icmp_sle_vx_nxv8i8( %va, i8 %b, @icmp_sle_vx_swap_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -1377,9 +1368,8 @@ define @icmp_uge_vv_nxv1i32( %va, @icmp_uge_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1541,9 +1531,8 @@ define @icmp_sge_vv_nxv1i32( %va, @icmp_sge_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1663,9 +1652,8 @@ define @icmp_sle_vx_nxv1i32( %va, i32 %b, @icmp_sle_vx_swap_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1887,9 +1875,8 @@ define @icmp_uge_vv_nxv8i32( %va, @icmp_uge_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsleu.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2066,9 +2053,8 @@ define @icmp_sge_vv_nxv8i32( %va, @icmp_sge_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2199,9 +2185,8 @@ define @icmp_sle_vx_nxv8i32( %va, i32 %b, @icmp_sle_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2644,9 +2629,8 @@ define @icmp_uge_vx_nxv1i64( %va, i64 %b, poison, i64 %b, i32 0 @@ -2898,9 +2882,8 @@ define @icmp_sge_vx_nxv1i64( %va, i64 %b, poison, i64 %b, i32 0 @@ -3095,9 +3078,8 @@ define @icmp_sle_vx_swap_nxv1i64( %va, i64 % ; ; RV64-LABEL: icmp_sle_vx_swap_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vmsle.vv v0, v9, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -3431,9 +3413,8 @@ define @icmp_uge_vx_nxv8i64( %va, i64 %b, @icmp_sge_vx_nxv8i64( %va, i64 %b, @icmp_sle_vx_swap_nxv8i64( %va, i64 % ; ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmv.v.x v24, a0 ; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll index c7b5200979370..2814be2792de9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll @@ -11,9 +11,7 @@ define @vdiv_vx_nxv8i7( %a, i7 signext %b, @vdivu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vdivu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vdivu.vv v8, v8, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index 7ca1983e8b32c..ab67e9833c78a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -4301,10 +4301,9 @@ define @vfnmadd_vf_nxv1f16_neg_splat( %va ; ZVFHMIN-LABEL: vfnmadd_vf_nxv1f16_neg_splat: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma @@ -4334,10 +4333,9 @@ define @vfnmadd_vf_nxv1f16_neg_splat_commute( @vfnmadd_vf_nxv1f16_neg_splat_unmasked( @vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv1f16_neg_splat( %va ; ZVFHMIN-LABEL: vfnmsub_vf_nxv1f16_neg_splat: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t @@ -4701,9 +4698,10 @@ define @vfnmsub_vf_nxv1f16_neg_splat_commute( @vfnmsub_vf_nxv1f16_neg_splat_unmasked( @vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv2f16_neg_splat( %va ; ZVFHMIN-LABEL: vfnmadd_vf_nxv2f16_neg_splat: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma @@ -5253,10 +5252,9 @@ define @vfnmadd_vf_nxv2f16_neg_splat_commute( @vfnmadd_vf_nxv2f16_neg_splat_unmasked( @vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv2f16_neg_splat( %va ; ZVFHMIN-LABEL: vfnmsub_vf_nxv2f16_neg_splat: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t @@ -5620,9 +5617,10 @@ define @vfnmsub_vf_nxv2f16_neg_splat_commute( @vfnmsub_vf_nxv2f16_neg_splat_unmasked( @vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv4f16_neg_splat( %va ; ZVFHMIN-LABEL: vfnmadd_vf_nxv4f16_neg_splat: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v9, v9, a1, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma @@ -6172,10 +6171,9 @@ define @vfnmadd_vf_nxv4f16_neg_splat_commute( @vfnmadd_vf_nxv4f16_neg_splat_unmasked( @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv4f16_neg_splat( %va ; ZVFHMIN-LABEL: vfnmsub_vf_nxv4f16_neg_splat: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v10, a1 ; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v9, v10, a1, v0.t @@ -6539,9 +6536,10 @@ define @vfnmsub_vf_nxv4f16_neg_splat_commute( @vfnmsub_vf_nxv4f16_neg_splat_unmasked( @vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv8f16_neg_splat( %va ; ZVFHMIN-LABEL: vfnmadd_vf_nxv8f16_neg_splat: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v10, v10, a1, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma @@ -7091,10 +7090,9 @@ define @vfnmadd_vf_nxv8f16_neg_splat_commute( @vfnmadd_vf_nxv8f16_neg_splat_unmasked( @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv8f16_neg_splat( %va ; ZVFHMIN-LABEL: vfnmsub_vf_nxv8f16_neg_splat: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v12, a1 ; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v10, v12, a1, v0.t @@ -7458,9 +7455,10 @@ define @vfnmsub_vf_nxv8f16_neg_splat_commute( @vfnmsub_vf_nxv8f16_neg_splat_unmasked( @vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv16f16_neg_splat( ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v4, v16, a1, v0.t ; ZVFHMIN-NEXT: vxor.vx v12, v12, a1, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma @@ -8152,12 +8151,11 @@ define @vfnmadd_vf_nxv16f16_neg_splat_commute( @vfnmadd_vf_nxv16f16_neg_splat_unmasked( @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv16f16_neg_splat( ; ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_neg_splat: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv4r.v v4, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v12, v16, a1, v0.t @@ -8592,9 +8589,10 @@ define @vfnmsub_vf_nxv16f16_neg_splat_commute( @vfnmsub_vf_nxv16f16_neg_splat_unmasked( @vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute( @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a4, 8 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a2 ; ZVFHMIN-NEXT: mv a3, a0 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a4, v0.t ; ZVFHMIN-NEXT: vxor.vx v16, v16, a4, v0.t ; ZVFHMIN-NEXT: slli a2, a1, 1 @@ -10848,10 +10847,9 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_unmasked( @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a3, 8 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma -; ZVFHMIN-NEXT: vmv.v.x v16, a2 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v16, a2 ; ZVFHMIN-NEXT: vxor.vx v24, v16, a3, v0.t ; ZVFHMIN-NEXT: slli a2, a1, 1 ; ZVFHMIN-NEXT: mv a3, a0 @@ -12641,9 +12638,8 @@ define @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vmax_vx_nxv8i7( %a, i7 signext %b, @vmaxu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vmaxu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 3441934fb1550..79631cd80594c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -11,9 +11,7 @@ define @vmin_vx_nxv8i7( %a, i7 signext %b, @vminu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vminu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll index 0b23314efde22..b63098b64e292 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll @@ -1448,11 +1448,10 @@ define @vmul_vadd_vx_nxv8i64_unmasked( %va, ; CHECK-LABEL: vmul_vadd_vx_nxv8i64_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 21 -; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: li a1, 7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmadd.vx v8, a1, v16 +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vmadd.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll index ba6d95c5a43b7..3273274a70b41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll @@ -11,9 +11,7 @@ define @vrem_vx_nxv8i7( %a, i7 signext %b, @vremu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vremu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vremu.vv v8, v8, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll index 3421c6af334bc..575d041b091dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll @@ -9,10 +9,9 @@ declare @llvm.vp.sadd.sat.nxv8i7(, @vsadd_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vsadd_vx_nxv8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vsra.vi v8, v8, 1 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll index 180e0799044e8..c9ed72bc63da2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll @@ -10,11 +10,10 @@ define @vsaddu_vx_nxv8i7( %a, i7 signext %b, ; CHECK-LABEL: vsaddu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t ; CHECK-NEXT: vminu.vx v8, v8, a2, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 80dd87ce5da85..5b577dc0f8df9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -1125,7 +1125,6 @@ exit: define @clobbered_forwarded_avl(i64 %n, %v, i1 %cmp) { ; CHECK-LABEL: clobbered_forwarded_avl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: .LBB27_1: # %for.body @@ -1133,9 +1132,7 @@ define @clobbered_forwarded_avl(i64 %n, %v ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: bnez a1, .LBB27_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v10, v8, v8 -; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v10, v8 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll index 380835494ed17..f5c46aec86b86 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll @@ -9,10 +9,9 @@ declare @llvm.vp.shl.nxv8i7(, @vsll_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vsll_vx_nxv8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll index 44d3ee96f5e61..001f744503523 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll @@ -7,10 +7,10 @@ define @vsitofp_nxv2bf16_nxv2i7( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_nxv2bf16_nxv2i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vsra.vi v8, v8, 1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vsext.vf2 v9, v8, v0.t ; CHECK-NEXT: vfwcvt.f.x.v v10, v9, v0.t ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma @@ -140,10 +140,10 @@ define @vsitofp_nxv2f16_nxv2i7( %va, @vsra_vi_mask_nxv8i32( %va, @vsra_vv_nxv1i8_sext_zext_mixed_trunc( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsra_vv_nxv1i8_sext_zext_mixed_trunc: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsext.vf4 v9, v8 ; CHECK-NEXT: vzext.vf4 v10, v8 ; CHECK-NEXT: vsra.vv v8, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t ; CHECK-NEXT: ret %sexted_va = sext %va to diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll index 208063bfd2342..961689b15b839 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll @@ -11,10 +11,8 @@ define @vsra_vx_nxv8i7( %a, i7 signext %b, @vsrl_vx_nxv8i7( %a, i7 signext %b, @llvm.vp.ssub.sat.nxv8i7(, @vssub_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vx_nxv8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vsra.vi v8, v8, 1 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t @@ -62,9 +61,8 @@ define @vssub_vx_nxv1i8( %va, i8 %b, @vssub_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vx_nxv1i8_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vssub.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index 7674a457ca961..b602f11e2c805 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -10,11 +10,10 @@ define @vssubu_vx_nxv8i7( %a, i7 signext %b, ; CHECK-LABEL: vssubu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 -; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 @@ -60,9 +59,8 @@ define @vssubu_vx_nxv1i8( %va, i8 %b, @vssubu_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vx_nxv1i8_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vssubu.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll index f17cdd9c72bfc..06d85193e3b61 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll @@ -8,9 +8,9 @@ define @vuitofp_nxv2bf16_nxv2i7( %va, @vuitofp_nxv2f16_nxv2i7( %va, @llvm.vp.shl.nxv2i64(, @vwsll_vv_nxv2i64_sext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv2i64_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -35,10 +34,9 @@ define @vwsll_vv_nxv2i64_sext( %a, @vwsll_vv_nxv2i64_zext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv2i64_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -57,17 +55,15 @@ define @vwsll_vv_nxv2i64_zext( %a, @vwsll_vx_i64_nxv2i64( %a, i64 %b, %m, i32 zeroext %vl) { ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv2i64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV64-NEXT: ret ; @@ -94,12 +90,11 @@ define @vwsll_vx_i64_nxv2i64( %a, i64 %b, < define @vwsll_vx_i32_nxv2i64_sext( %a, i32 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -120,12 +115,11 @@ define @vwsll_vx_i32_nxv2i64_sext( %a, i32 define @vwsll_vx_i32_nxv2i64_zext( %a, i32 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i32_nxv2i64_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -146,12 +140,11 @@ define @vwsll_vx_i32_nxv2i64_zext( %a, i32 define @vwsll_vx_i16_nxv2i64_sext( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv2i64_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -172,12 +165,11 @@ define @vwsll_vx_i16_nxv2i64_sext( %a, i16 define @vwsll_vx_i16_nxv2i64_zext( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv2i64_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -198,12 +190,11 @@ define @vwsll_vx_i16_nxv2i64_zext( %a, i16 define @vwsll_vx_i8_nxv2i64_sext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf8 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -224,12 +215,11 @@ define @vwsll_vx_i8_nxv2i64_sext( %a, i8 %b define @vwsll_vx_i8_nxv2i64_zext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv2i64_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf8 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -250,9 +240,8 @@ define @vwsll_vx_i8_nxv2i64_zext( %a, i8 %b define @vwsll_vi_nxv2i64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t ; CHECK-NEXT: ret ; @@ -276,10 +265,9 @@ declare @llvm.vp.shl.nxv4i32(, @vwsll_vv_nxv4i32_sext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv4i32_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -298,10 +286,9 @@ define @vwsll_vv_nxv4i32_sext( %a, @vwsll_vv_nxv4i32_zext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv4i32_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -320,17 +307,15 @@ define @vwsll_vv_nxv4i32_zext( %a, @vwsll_vx_i64_nxv4i32( %a, i64 %b, %m, i32 zeroext %vl) { ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv4i32: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv4i32: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV64-NEXT: ret ; @@ -358,9 +343,8 @@ define @vwsll_vx_i64_nxv4i32( %a, i64 %b, < define @vwsll_vx_i32_nxv4i32( %a, i32 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i32_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-NEXT: ret ; @@ -380,12 +364,11 @@ define @vwsll_vx_i32_nxv4i32( %a, i32 %b, < define @vwsll_vx_i16_nxv4i32_sext( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -406,12 +389,11 @@ define @vwsll_vx_i16_nxv4i32_sext( %a, i16 define @vwsll_vx_i16_nxv4i32_zext( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv4i32_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -432,12 +414,11 @@ define @vwsll_vx_i16_nxv4i32_zext( %a, i16 define @vwsll_vx_i8_nxv4i32_sext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv4i32_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf4 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -458,12 +439,11 @@ define @vwsll_vx_i8_nxv4i32_sext( %a, i8 %b define @vwsll_vx_i8_nxv4i32_zext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv4i32_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf4 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -484,9 +464,8 @@ define @vwsll_vx_i8_nxv4i32_zext( %a, i8 %b define @vwsll_vi_nxv4i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t ; CHECK-NEXT: ret ; @@ -511,10 +490,9 @@ declare @llvm.vp.shl.nxv8i16(, @vwsll_vv_nxv8i16_sext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv8i16_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -533,10 +511,9 @@ define @vwsll_vv_nxv8i16_sext( %a, @vwsll_vv_nxv8i16_zext( %a, %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vv_nxv8i16_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -555,17 +532,15 @@ define @vwsll_vv_nxv8i16_zext( %a, @vwsll_vx_i64_nxv8i16( %a, i64 %b, %m, i32 zeroext %vl) { ; CHECK-RV32-LABEL: vwsll_vx_i64_nxv8i16: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-RV32-NEXT: vzext.vf2 v10, v8 ; CHECK-RV32-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vwsll_vx_i64_nxv8i16: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-RV64-NEXT: vzext.vf2 v10, v8 ; CHECK-RV64-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-RV64-NEXT: ret ; @@ -593,9 +568,8 @@ define @vwsll_vx_i64_nxv8i16( %a, i64 %b, @vwsll_vx_i32_nxv8i16( %a, i32 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i32_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-NEXT: ret ; @@ -616,9 +590,8 @@ define @vwsll_vx_i32_nxv8i16( %a, i32 %b, @vwsll_vx_i16_nxv8i16( %a, i16 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i16_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vx v8, v10, a0, v0.t ; CHECK-NEXT: ret ; @@ -638,12 +611,11 @@ define @vwsll_vx_i16_nxv8i16( %a, i16 %b, @vwsll_vx_i8_nxv8i16_sext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -664,12 +636,11 @@ define @vwsll_vx_i8_nxv8i16_sext( %a, i8 %b, define @vwsll_vx_i8_nxv8i16_zext( %a, i8 %b, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vx_i8_nxv8i16_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vzext.vf2 v12, v9 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret ; @@ -690,9 +661,8 @@ define @vwsll_vx_i8_nxv8i16_zext( %a, i8 %b, define @vwsll_vi_nxv8i16( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vwsll_vi_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vsll.vi v8, v10, 2, v0.t ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 4c9d9e5ffdf77..42c87c9660dc9 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -664,14 +664,15 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vslidedown.vi v10, v10, 2 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32MV-NEXT: vand.vv v8, v10, v8 -; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, ta, ma ; RV32MV-NEXT: vmv.v.i v10, 1 +; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32MV-NEXT: vmv.v.i v11, 0 ; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; RV32MV-NEXT: vslideup.vi v11, v10, 2 -; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32MV-NEXT: vsetivli zero, 5, e8, mf2, ta, ma ; RV32MV-NEXT: vmv.v.i v10, 2 -; RV32MV-NEXT: vsetivli zero, 5, e8, mf2, tu, ma +; RV32MV-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; RV32MV-NEXT: vslideup.vi v11, v10, 4 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32MV-NEXT: vsext.vf4 v12, v11 From cde996c31d6fb6ae1bbbc79aa71dff2b7fc0a8ae Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 17 Dec 2024 16:26:23 -0500 Subject: [PATCH 28/35] [lld/COFF] Remove needless indirection `symtab.ctx.symtab` is just `symtab`. Looks like #119296 added this using a global find-and-replace. This was the only instance of `symtab.ctx.symtab` in lld/. No behavior change. --- lld/COFF/SymbolTable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 515239db029f7..6b3375e13e839 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -116,7 +116,7 @@ static void forceLazy(Symbol *s) { } case Symbol::Kind::LazyObjectKind: { InputFile *file = cast(s)->file; - file->symtab.ctx.symtab.addFile(file); + file->symtab.addFile(file); break; } case Symbol::Kind::LazyDLLSymbolKind: { From 1d06157b9ea4bc76eff0ae670e8d3411eaaa6e42 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 17 Dec 2024 13:30:20 -0800 Subject: [PATCH 29/35] [libc] fix -Wgcc-compat (#120303) I don't quite recall why I added those in the first place. These tests build without diagnostics for both clang and GCC with this fix. Fixes: #114653 --- libc/test/include/stdbit_stub.h | 56 +++++++++++++-------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/libc/test/include/stdbit_stub.h b/libc/test/include/stdbit_stub.h index 65b1ca3b2c297..8a8e30e889d6d 100644 --- a/libc/test/include/stdbit_stub.h +++ b/libc/test/include/stdbit_stub.h @@ -17,21 +17,11 @@ #include // bool in C #define STDBIT_STUB_FUNCTION(FUNC_NAME, LEADING_VAL) \ - unsigned FUNC_NAME##_uc(unsigned char x) __NOEXCEPT { \ - return LEADING_VAL##AU; \ - } \ - unsigned FUNC_NAME##_us(unsigned short x) __NOEXCEPT { \ - return LEADING_VAL##BU; \ - } \ - unsigned FUNC_NAME##_ui(unsigned int x) __NOEXCEPT { \ - return LEADING_VAL##CU; \ - } \ - unsigned FUNC_NAME##_ul(unsigned long x) __NOEXCEPT { \ - return LEADING_VAL##DU; \ - } \ - unsigned FUNC_NAME##_ull(unsigned long long x) __NOEXCEPT { \ - return LEADING_VAL##EU; \ - } + unsigned FUNC_NAME##_uc(unsigned char x) { return LEADING_VAL##AU; } \ + unsigned FUNC_NAME##_us(unsigned short x) { return LEADING_VAL##BU; } \ + unsigned FUNC_NAME##_ui(unsigned int x) { return LEADING_VAL##CU; } \ + unsigned FUNC_NAME##_ul(unsigned long x) { return LEADING_VAL##DU; } \ + unsigned FUNC_NAME##_ull(unsigned long long x) { return LEADING_VAL##EU; } __BEGIN_C_DECLS @@ -46,28 +36,24 @@ STDBIT_STUB_FUNCTION(stdc_first_trailing_one, 0x1) STDBIT_STUB_FUNCTION(stdc_count_zeros, 0x2) STDBIT_STUB_FUNCTION(stdc_count_ones, 0x3) -bool stdc_has_single_bit_uc(unsigned char x) __NOEXCEPT { return false; } -bool stdc_has_single_bit_us(unsigned short x) __NOEXCEPT { return false; } -bool stdc_has_single_bit_ui(unsigned x) __NOEXCEPT { return false; } -bool stdc_has_single_bit_ul(unsigned long x) __NOEXCEPT { return false; } -bool stdc_has_single_bit_ull(unsigned long long x) __NOEXCEPT { return false; } +bool stdc_has_single_bit_uc(unsigned char x) { return false; } +bool stdc_has_single_bit_us(unsigned short x) { return false; } +bool stdc_has_single_bit_ui(unsigned x) { return false; } +bool stdc_has_single_bit_ul(unsigned long x) { return false; } +bool stdc_has_single_bit_ull(unsigned long long x) { return false; } STDBIT_STUB_FUNCTION(stdc_bit_width, 0x4) -unsigned char stdc_bit_floor_uc(unsigned char x) __NOEXCEPT { return 0x5AU; } -unsigned short stdc_bit_floor_us(unsigned short x) __NOEXCEPT { return 0x5BU; } -unsigned stdc_bit_floor_ui(unsigned x) __NOEXCEPT { return 0x5CU; } -unsigned long stdc_bit_floor_ul(unsigned long x) __NOEXCEPT { return 0x5DUL; } -unsigned long long stdc_bit_floor_ull(unsigned long long x) __NOEXCEPT { - return 0x5EULL; -} - -unsigned char stdc_bit_ceil_uc(unsigned char x) __NOEXCEPT { return 0x6AU; } -unsigned short stdc_bit_ceil_us(unsigned short x) __NOEXCEPT { return 0x6BU; } -unsigned stdc_bit_ceil_ui(unsigned x) __NOEXCEPT { return 0x6CU; } -unsigned long stdc_bit_ceil_ul(unsigned long x) __NOEXCEPT { return 0x6DUL; } -unsigned long long stdc_bit_ceil_ull(unsigned long long x) __NOEXCEPT { - return 0x6EULL; -} +unsigned char stdc_bit_floor_uc(unsigned char x) { return 0x5AU; } +unsigned short stdc_bit_floor_us(unsigned short x) { return 0x5BU; } +unsigned stdc_bit_floor_ui(unsigned x) { return 0x5CU; } +unsigned long stdc_bit_floor_ul(unsigned long x) { return 0x5DUL; } +unsigned long long stdc_bit_floor_ull(unsigned long long x) { return 0x5EULL; } + +unsigned char stdc_bit_ceil_uc(unsigned char x) { return 0x6AU; } +unsigned short stdc_bit_ceil_us(unsigned short x) { return 0x6BU; } +unsigned stdc_bit_ceil_ui(unsigned x) { return 0x6CU; } +unsigned long stdc_bit_ceil_ul(unsigned long x) { return 0x6DUL; } +unsigned long long stdc_bit_ceil_ull(unsigned long long x) { return 0x6EULL; } __END_C_DECLS From 958de20b30e73d898cf538435da5bab42ffd4987 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 17 Dec 2024 15:31:07 -0600 Subject: [PATCH 30/35] [libc] Enable 'timespec_get' for the GPU build (#120304) Summary: Currently fails to build libc++ because this is missing. --- libc/config/gpu/entrypoints.txt | 1 + libc/src/__support/time/gpu/time_utils.h | 2 ++ libc/src/time/gpu/nanosleep.cpp | 2 -- libc/src/time/gpu/timespec_get.cpp | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index e3ebd3ca47256..28317c656d4be 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -257,6 +257,7 @@ set(TARGET_LIBC_ENTRYPOINTS # time.h entrypoints libc.src.time.clock libc.src.time.clock_gettime + libc.src.time.timespec_get libc.src.time.nanosleep # wchar.h entrypoints diff --git a/libc/src/__support/time/gpu/time_utils.h b/libc/src/__support/time/gpu/time_utils.h index c631a38d91ba2..315506c897dcf 100644 --- a/libc/src/__support/time/gpu/time_utils.h +++ b/libc/src/__support/time/gpu/time_utils.h @@ -38,6 +38,8 @@ extern gpu::Constant __llvm_libc_clock_freq; #error "Unsupported target" #endif +constexpr uint64_t TICKS_PER_SEC = 1000000000UL; + } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_TIME_GPU_TIME_UTILS_H diff --git a/libc/src/time/gpu/nanosleep.cpp b/libc/src/time/gpu/nanosleep.cpp index 25a22d5703fa7..a92f660f225cb 100644 --- a/libc/src/time/gpu/nanosleep.cpp +++ b/libc/src/time/gpu/nanosleep.cpp @@ -13,8 +13,6 @@ namespace LIBC_NAMESPACE_DECL { -constexpr uint64_t TICKS_PER_SEC = 1000000000UL; - LLVM_LIBC_FUNCTION(int, nanosleep, (const struct timespec *req, struct timespec *rem)) { if (!GPU_CLOCKS_PER_SEC || !req) diff --git a/libc/src/time/gpu/timespec_get.cpp b/libc/src/time/gpu/timespec_get.cpp index f4ef328a83122..0dd128444aa8e 100644 --- a/libc/src/time/gpu/timespec_get.cpp +++ b/libc/src/time/gpu/timespec_get.cpp @@ -10,6 +10,7 @@ #include "hdr/time_macros.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/__support/time/gpu/time_utils.h" namespace LIBC_NAMESPACE_DECL { From eb59fe8d04a594da4ac0cdba2bb2ca828adcf833 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 17 Dec 2024 21:32:40 +0000 Subject: [PATCH 31/35] [VPlan] Remove redundant assignment in VPReductionPHIRecipe (NFC) Suggested post-commit for 0e528ac404e13ed2d952a2d83aaf8383293c851e. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7239ecefbde56..7f8c560270bc0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3413,7 +3413,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { // TODO: The sentinel value is not always necessary. When the start value is // a constant, and smaller than the start value of the induction variable, // the start value can be directly used to initialize the reduction phi. - StartV = Iden = StartV; + Iden = StartV; if (!ScalarPHI) { IRBuilderBase::InsertPointGuard IPBuilder(Builder); Builder.SetInsertPoint(VectorPH->getTerminator()); From 97b7bace67c4fb7d62892f5bc6d7614a65d0fb3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 17 Dec 2024 13:41:24 -0800 Subject: [PATCH 32/35] [flang][cuda] Allow host array with PARAMETER attribute in device context (#120298) Host arrays are normally not allowed in device context unless they have a `PARAMETER` attribute. This patch update the check so no error is emitted. --- flang/lib/Semantics/check-cuda.cpp | 1 + flang/test/Semantics/cuf09.cuf | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp index 9c044a47c7983..d8c9db2241739 100644 --- a/flang/lib/Semantics/check-cuda.cpp +++ b/flang/lib/Semantics/check-cuda.cpp @@ -110,6 +110,7 @@ struct FindHostArray if (const auto *details{ symbol.GetUltimate().detailsIf()}) { if (details->IsArray() && + !symbol.attrs().test(Fortran::semantics::Attr::PARAMETER) && (!details->cudaDataAttr() || (details->cudaDataAttr() && *details->cudaDataAttr() != common::CUDADataAttr::Device && diff --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf index e0ca814aec26a..e2247da961f7e 100644 --- a/flang/test/Semantics/cuf09.cuf +++ b/flang/test/Semantics/cuf09.cuf @@ -1,6 +1,7 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 module m integer :: m(100) + integer, parameter :: p(5) = [1,2,3,4,5] contains attributes(device) subroutine devsub !ERROR: Statement may not appear in device code @@ -23,6 +24,12 @@ module m if (i .le. N) a(i) = m(i) end subroutine + attributes(global) subroutine hostparameter(a) + integer :: a(*) + i = threadIdx%x + if (i .le. N) a(i) = p(i) ! ok. p is parameter + end subroutine + attributes(global) subroutine localarray() integer :: a(10) i = threadIdx%x From 15c61a208ffdd0bfe6add667cf9e56df1fdbf16e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 17 Dec 2024 13:42:14 -0800 Subject: [PATCH 33/35] [flang][cuda] Do not consider SHARED array as host array (#120306) Update the current `FindHostArray` to not return shared array as host array. --- flang/lib/Semantics/check-cuda.cpp | 1 + flang/test/Semantics/cuf09.cuf | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp index d8c9db2241739..15ee5f32f69b1 100644 --- a/flang/lib/Semantics/check-cuda.cpp +++ b/flang/lib/Semantics/check-cuda.cpp @@ -115,6 +115,7 @@ struct FindHostArray (details->cudaDataAttr() && *details->cudaDataAttr() != common::CUDADataAttr::Device && *details->cudaDataAttr() != common::CUDADataAttr::Managed && + *details->cudaDataAttr() != common::CUDADataAttr::Shared && *details->cudaDataAttr() != common::CUDADataAttr::Unified))) { return &symbol; } diff --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf index e2247da961f7e..7452ee7cd7dc6 100644 --- a/flang/test/Semantics/cuf09.cuf +++ b/flang/test/Semantics/cuf09.cuf @@ -35,6 +35,13 @@ module m i = threadIdx%x a(i) = i end subroutine + + attributes(global) subroutine sharedarray(a) + integer, device :: a(10) + integer, shared :: s(10) + i = threadIdx%x + a(i) = s(10) ! ok, a is device and s is shared + end subroutine end program main From 9f231a85004fad080980e80ef881c81d1d5bb60e Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Tue, 17 Dec 2024 13:49:31 -0800 Subject: [PATCH 34/35] [NVPTX] Prefer ValueType when defining DAG patterns (NFC) (#120161) Replace uses of register class in dag patterns with value types. These types are much more concise and in cases where a single register class maps to multiple types, they avoid the need for both. --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 948 +++++++++++------------ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 564 +++++++------- 2 files changed, 756 insertions(+), 756 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index a7836ccc45f47..abaf8e0b0ec1f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -213,33 +213,33 @@ multiclass I3 { def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + [(set i64:$dst, (OpNode i64:$a, i64:$b))]>; def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + [(set i64:$dst, (OpNode i64:$a, imm:$b))]>; def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>; + [(set i32:$dst, (OpNode i32:$a, i32:$b))]>; def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>; + [(set i32:$dst, (OpNode i32:$a, imm:$b))]>; def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + [(set i16:$dst, (OpNode i16:$a, i16:$b))]>; def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; + [(set i16:$dst, (OpNode i16:$a, (imm):$b))]>; } class I16x2 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, "16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2i16 Int32Regs:$a), (v2i16 Int32Regs:$b)))]>, + [(set v2i16:$dst, (OpNode v2i16:$a, v2i16:$b))]>, Requires<[hasPTX<80>, hasSM<90>]>; // Template for instructions which take 3 int args. The instructions are @@ -249,20 +249,20 @@ multiclass ADD_SUB_INT_CARRY { def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>; + [(set i32:$dst, (OpNode i32:$a, i32:$b))]>; def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>; + [(set i32:$dst, (OpNode i32:$a, imm:$b))]>; def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>, + [(set i64:$dst, (OpNode i64:$a, i64:$b))]>, Requires<[hasPTX<43>]>; def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>, + [(set i64:$dst, (OpNode i64:$a, imm:$b))]>, Requires<[hasPTX<43>]>; } } @@ -277,72 +277,72 @@ multiclass FMINIMUMMAXIMUM { NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, Float64Regs:$b), !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; + [(set f64:$dst, (OpNode f64:$a, f64:$b))]>; def f64ri : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, f64imm:$b), !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; + [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>; } def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (OpNode f32:$a, f32:$b))]>, Requires<[doF32FTZ]>; def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>, Requires<[doF32FTZ]>; def f32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; + [(set f32:$dst, (OpNode f32:$a, f32:$b))]>; def f32ri : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; + [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>; def f16rr_ftz : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, + [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, Requires<[useFP16Math, doF32FTZ]>; def f16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, + [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>; def f16x2rr_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, + [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>; def f16x2rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, + [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>; def bf16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, + [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>, Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>; def bf16x2rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, + [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>, Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>; } @@ -360,161 +360,161 @@ multiclass F3_fma_component { NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, Float64Regs:$b), !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, + [(set f64:$dst, (OpNode f64:$a, f64:$b))]>, Requires<[allowFMA]>; def f64ri : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, f64imm:$b), !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, + [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>, Requires<[allowFMA]>; def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (OpNode f32:$a, f32:$b))]>, Requires<[allowFMA, doF32FTZ]>; def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>, Requires<[allowFMA, doF32FTZ]>; def f32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (OpNode f32:$a, f32:$b))]>, Requires<[allowFMA]>; def f32ri : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>, Requires<[allowFMA]>; def f16rr_ftz : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, + [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, Requires<[useFP16Math, allowFMA, doF32FTZ]>; def f16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, + [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, Requires<[useFP16Math, allowFMA]>; def f16x2rr_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), - [(set (v2f16 Int32Regs:$dst), (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, + [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, allowFMA, doF32FTZ]>; def f16x2rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, + [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, allowFMA]>; def bf16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, + [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>, Requires<[hasBF16Math, allowFMA]>; def bf16x2rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, + [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>, Requires<[hasBF16Math, allowFMA]>; // These have strange names so we don't perturb existing mir tests. def _rnf64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, Float64Regs:$b), !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, + [(set f64:$dst, (OpNode f64:$a, f64:$b))]>, Requires<[noFMA]>; def _rnf64ri : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, f64imm:$b), !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, + [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>, Requires<[noFMA]>; def _rnf32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (OpNode f32:$a, Float32Regs:$b))]>, Requires<[noFMA, doF32FTZ]>; def _rnf32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>, Requires<[noFMA, doF32FTZ]>; def _rnf32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (OpNode f32:$a, f32:$b))]>, Requires<[noFMA]>; def _rnf32ri : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>, Requires<[noFMA]>; def _rnf16rr_ftz : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, + [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, Requires<[useFP16Math, noFMA, doF32FTZ]>; def _rnf16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, + [(set f16:$dst, (OpNode f16:$a, f16:$b))]>, Requires<[useFP16Math, noFMA]>; def _rnf16x2rr_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, + [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, noFMA, doF32FTZ]>; def _rnf16x2rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, + [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>, Requires<[useFP16Math, noFMA]>; def _rnbf16rr_ftz : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.ftz.bf16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, + [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>, Requires<[hasBF16Math, noFMA, doF32FTZ]>; def _rnbf16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.bf16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>, + [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>, Requires<[hasBF16Math, noFMA]>; def _rnbf16x2rr_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".rn.ftz.bf16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, + [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>, Requires<[hasBF16Math, noFMA, doF32FTZ]>; def _rnbf16x2rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".rn.bf16x2 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>, + [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>, Requires<[hasBF16Math, noFMA]>; } @@ -524,40 +524,40 @@ multiclass F3_fma_component { multiclass F2 { def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), !strconcat(OpcStr, ".f64 \t$dst, $a;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; + [(set f64:$dst, (OpNode f64:$a))]>; def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, + [(set f32:$dst, (OpNode f32:$a))]>, Requires<[doF32FTZ]>; def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), !strconcat(OpcStr, ".f32 \t$dst, $a;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; + [(set f32:$dst, (OpNode f32:$a))]>; } multiclass F2_Support_Half { def bf16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), !strconcat(OpcStr, ".bf16 \t$dst, $a;"), - [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a)))]>, + [(set bf16:$dst, (OpNode bf16:$a))]>, Requires<[hasSM<80>, hasPTX<70>]>; def bf16x2 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), !strconcat(OpcStr, ".bf16x2 \t$dst, $a;"), - [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a)))]>, + [(set v2bf16:$dst, (OpNode v2bf16:$a))]>, Requires<[hasSM<80>, hasPTX<70>]>; def f16_ftz : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a;"), - [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a)))]>, + [(set f16:$dst, (OpNode f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>; def f16x2_ftz : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a;"), - [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a)))]>, + [(set v2f16:$dst, (OpNode v2f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>; def f16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a), !strconcat(OpcStr, ".f16 \t$dst, $a;"), - [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a)))]>, + [(set f16:$dst, (OpNode f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>]>; def f16x2 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), !strconcat(OpcStr, ".f16x2 \t$dst, $a;"), - [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a)))]>, + [(set v2f16:$dst, (OpNode v2f16:$a))]>, Requires<[hasSM<53>, hasPTX<65>]>; } @@ -731,13 +731,13 @@ def fpround_oneuse : PatFrag<(ops node:$a), (fpround node:$a), [{ return N->hasOneUse(); }]>; -def : Pat<(v2bf16 (build_vector (bf16 (fpround_oneuse Float32Regs:$lo)), - (bf16 (fpround_oneuse Float32Regs:$hi)))), +def : Pat<(v2bf16 (build_vector (bf16 (fpround_oneuse f32:$lo)), + (bf16 (fpround_oneuse f32:$hi)))), (CVT_bf16x2_f32 Float32Regs:$hi, Float32Regs:$lo, CvtRN)>, Requires<[hasPTX<70>, hasSM<80>, hasBF16Math]>; -def : Pat<(v2f16 (build_vector (f16 (fpround_oneuse Float32Regs:$lo)), - (f16 (fpround_oneuse Float32Regs:$hi)))), +def : Pat<(v2f16 (build_vector (f16 (fpround_oneuse f32:$lo)), + (f16 (fpround_oneuse f32:$hi)))), (CVT_f16x2_f32 Float32Regs:$hi, Float32Regs:$lo, CvtRN)>, Requires<[hasPTX<70>, hasSM<80>, useFP16Math]>; @@ -771,22 +771,22 @@ let hasSideEffects = false in { NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, Int1Regs:$p), !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), - [(set (T RC:$dst), (select Int1Regs:$p, (T RC:$a), (T RC:$b)))]>; + [(set T:$dst, (select i1:$p, T:$a, T:$b))]>; def ri : NVPTXInst<(outs RC:$dst), (ins RC:$a, ImmCls:$b, Int1Regs:$p), !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), - [(set (T RC:$dst), (select Int1Regs:$p, (T RC:$a), (T ImmNode:$b)))]>; + [(set T:$dst, (select i1:$p, T:$a, (T ImmNode:$b)))]>; def ir : NVPTXInst<(outs RC:$dst), (ins ImmCls:$a, RC:$b, Int1Regs:$p), !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), - [(set (T RC:$dst), (select Int1Regs:$p, ImmNode:$a, (T RC:$b)))]>; + [(set T:$dst, (select i1:$p, ImmNode:$a, T:$b))]>; def ii : NVPTXInst<(outs RC:$dst), (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), - [(set (T RC:$dst), (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; + [(set T:$dst, (select i1:$p, ImmNode:$a, ImmNode:$b))]>; } } @@ -812,7 +812,7 @@ defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>; // defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>; foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { -def : Pat<(vt (select Int1Regs:$p, (vt Int32Regs:$a), (vt Int32Regs:$b))), +def : Pat<(vt (select i1:$p, vt:$a, vt:$b)), (SELP_b32rr Int32Regs:$a, Int32Regs:$b, Int1Regs:$p)>; } @@ -841,10 +841,10 @@ def TESTINF_f64i : NVPTXInst<(outs Int1Regs:$p), (ins f64imm:$a), multiclass ADD_SUB_i1 { def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + [(set i1:$dst, (OpNode i1:$a, i1:$b))]>; def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; + [(set i1:$dst, (OpNode i1:$a, (imm):$b))]>; } // int1 addition and subtraction are both just xor. @@ -885,7 +885,7 @@ defm UREM : I3<"rem.u", urem>; multiclass ABS { def : NVPTXInst<(outs RC:$dst), (ins RC:$a), !strconcat("abs", SizeName, " \t$dst, $a;"), - [(set (T RC:$dst), (abs (T RC:$a)))]>; + [(set T:$dst, (abs T:$a))]>; } defm ABS_16 : ABS; defm ABS_32 : ABS; @@ -954,26 +954,26 @@ def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)), (MULWIDES32 i16:$a, i16:$b)>, Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), +def : Pat<(i32 (mul_wide_signed i16:$a, imm:$b)), (MULWIDES32Imm Int16Regs:$a, imm:$b)>, Requires<[doMulWide]>; def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)), (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), +def : Pat<(i32 (mul_wide_unsigned i16:$a, imm:$b)), (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, Requires<[doMulWide]>; def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)), (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_signed (i32 Int32Regs:$a), imm:$b)), +def : Pat<(i64 (mul_wide_signed i32:$a, imm:$b)), (MULWIDES64Imm Int32Regs:$a, imm:$b)>, Requires<[doMulWide]>; def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)), (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_unsigned (i32 Int32Regs:$a), imm:$b)), +def : Pat<(i64 (mul_wide_unsigned i32:$a, imm:$b)), (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, Requires<[doMulWide]>; @@ -1023,46 +1023,46 @@ def SHL2MUL16 : SDNodeXForm; // Convert "sign/zero-extend, then shift left by an immediate" to mul.wide. -def : Pat<(shl (sext Int32Regs:$a), (i32 IntConst_0_30:$b)), +def : Pat<(shl (sext i32:$a), (i32 IntConst_0_30:$b)), (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, Requires<[doMulWide]>; -def : Pat<(shl (zext Int32Regs:$a), (i32 IntConst_0_30:$b)), +def : Pat<(shl (zext i32:$a), (i32 IntConst_0_30:$b)), (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, Requires<[doMulWide]>; -def : Pat<(shl (sext Int16Regs:$a), (i16 IntConst_0_14:$b)), +def : Pat<(shl (sext i16:$a), (i16 IntConst_0_14:$b)), (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, Requires<[doMulWide]>; -def : Pat<(shl (zext Int16Regs:$a), (i16 IntConst_0_14:$b)), +def : Pat<(shl (zext i16:$a), (i16 IntConst_0_14:$b)), (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, Requires<[doMulWide]>; // Convert "sign/zero-extend then multiply" to mul.wide. -def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), +def : Pat<(mul (sext i32:$a), (sext i32:$b)), (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; -def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), +def : Pat<(mul (sext i32:$a), (i64 SInt32Const:$b)), (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, Requires<[doMulWide]>; -def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), +def : Pat<(mul (zext i32:$a), (zext i32:$b)), (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; -def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), +def : Pat<(mul (zext i32:$a), (i64 UInt32Const:$b)), (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, Requires<[doMulWide]>; -def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), +def : Pat<(mul (sext i16:$a), (sext i16:$b)), (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; -def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), +def : Pat<(mul (sext i16:$a), (i32 SInt16Const:$b)), (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, Requires<[doMulWide]>; -def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), +def : Pat<(mul (zext i16:$a), (zext i16:$b)), (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; -def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), +def : Pat<(mul (zext i16:$a), (i32 UInt16Const:$b)), (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, Requires<[doMulWide]>; @@ -1078,77 +1078,77 @@ def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; + [(set i16:$dst, (imad i16:$a, i16:$b, i16:$c))]>; def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; + [(set i16:$dst, (imad i16:$a, i16:$b, imm:$c))]>; def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; + [(set i16:$dst, (imad i16:$a, imm:$b, i16:$c))]>; def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b, i16imm:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, imm:$c))]>; + [(set i16:$dst, (imad i16:$a, imm:$b, imm:$c))]>; def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>; + [(set i32:$dst, (imad i32:$a, i32:$b, i32:$c))]>; def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), imm:$c))]>; + [(set i32:$dst, (imad i32:$a, i32:$b, imm:$c))]>; def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, (i32 Int32Regs:$c)))]>; + [(set i32:$dst, (imad i32:$a, imm:$b, i32:$c))]>; def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b, i32imm:$c), "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, imm:$c))]>; + [(set i32:$dst, (imad i32:$a, imm:$b, imm:$c))]>; def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; + [(set i64:$dst, (imad i64:$a, i64:$b, i64:$c))]>; def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; + [(set i64:$dst, (imad i64:$a, i64:$b, imm:$c))]>; def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; + [(set i64:$dst, (imad i64:$a, imm:$b, i64:$c))]>; def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b, i64imm:$c), "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]>; + [(set i64:$dst, (imad i64:$a, imm:$b, imm:$c))]>; def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), "neg.s16 \t$dst, $src;", - [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; + [(set i16:$dst, (ineg i16:$src))]>; def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), "neg.s32 \t$dst, $src;", - [(set (i32 Int32Regs:$dst), (ineg (i32 Int32Regs:$src)))]>; + [(set i32:$dst, (ineg i32:$src))]>; def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), "neg.s64 \t$dst, $src;", - [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; + [(set i64:$dst, (ineg i64:$src))]>; //----------------------------------- // Floating Point Arithmetic @@ -1210,7 +1210,7 @@ defm FSQRT : F2<"sqrt.rn", fsqrt>; class FNEG_F16_F16X2 : NVPTXInst<(outs RC:$dst), (ins RC:$src), !strconcat(OpcStr, " \t$dst, $src;"), - [(set RC:$dst, (fneg (T RC:$src)))]>, + [(set T:$dst, (fneg T:$src))]>, Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>; def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>; def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>; @@ -1224,7 +1224,7 @@ def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>; class FNEG_BF16_F16X2 : NVPTXInst<(outs RC:$dst), (ins RC:$src), !strconcat(OpcStr, " \t$dst, $src;"), - [(set RC:$dst, (fneg (T RC:$src)))]>, + [(set T:$dst, (fneg T:$src))]>, Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>; def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, Int16Regs, doF32FTZ>; def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, Int16Regs, True>; @@ -1238,21 +1238,21 @@ def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$a, Float64Regs:$b), "rcp.rn.f64 \t$dst, $b;", - [(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>; + [(set f64:$dst, (fdiv DoubleConst1:$a, f64:$b))]>; def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, Float64Regs:$b), "div.rn.f64 \t$dst, $a, $b;", - [(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>; + [(set f64:$dst, (fdiv f64:$a, f64:$b))]>; def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, f64imm:$b), "div.rn.f64 \t$dst, $a, $b;", - [(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>; + [(set f64:$dst, (fdiv f64:$a, fpimm:$b))]>; // fdiv will be converted to rcp // fneg (fdiv 1.0, X) => fneg (rcp.rn X) -def : Pat<(fdiv DoubleConstNeg1:$a, Float64Regs:$b), +def : Pat<(fdiv DoubleConstNeg1:$a, f64:$b), (FNEGf64 (FDIV641r (NegDoubleConst node:$a), Float64Regs:$b))>; // @@ -1262,13 +1262,13 @@ def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$a, Float32Regs:$b), "rcp.approx.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>, Requires<[do_DIVF32_APPROX, doF32FTZ]>; def FDIV321r : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$a, Float32Regs:$b), "rcp.approx.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>, Requires<[do_DIVF32_APPROX]>; // // F32 Approximate division @@ -1277,25 +1277,25 @@ def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), "div.approx.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv f32:$a, f32:$b))]>, Requires<[do_DIVF32_APPROX, doF32FTZ]>; def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), "div.approx.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>, Requires<[do_DIVF32_APPROX, doF32FTZ]>; def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), "div.approx.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv f32:$a, f32:$b))]>, Requires<[do_DIVF32_APPROX]>; def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), "div.approx.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>, Requires<[do_DIVF32_APPROX]>; // // F32 Semi-accurate reciprocal @@ -1306,13 +1306,13 @@ def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$a, Float32Regs:$b), "rcp.approx.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>, Requires<[do_DIVF32_FULL, doF32FTZ]>; def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$a, Float32Regs:$b), "rcp.approx.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>, Requires<[do_DIVF32_FULL]>; // // F32 Semi-accurate division @@ -1321,25 +1321,25 @@ def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), "div.full.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv Float32Regs:$a, f32:$b))]>, Requires<[do_DIVF32_FULL, doF32FTZ]>; def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), "div.full.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>, Requires<[do_DIVF32_FULL, doF32FTZ]>; def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), "div.full.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv f32:$a, f32:$b))]>, Requires<[do_DIVF32_FULL]>; def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), "div.full.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>, Requires<[do_DIVF32_FULL]>; // // F32 Accurate reciprocal @@ -1348,13 +1348,13 @@ def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$a, Float32Regs:$b), "rcp.rn.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>, Requires<[doF32FTZ]>; def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$a, Float32Regs:$b), "rcp.rn.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>; + [(set f32:$dst, (fdiv FloatConst1:$a, f32:$b))]>; // // F32 Accurate division // @@ -1362,24 +1362,24 @@ def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), "div.rn.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + [(set f32:$dst, (fdiv f32:$a, f32:$b))]>, Requires<[doF32FTZ]>; def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), "div.rn.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>, Requires<[doF32FTZ]>; def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b), "div.rn.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>; + [(set f32:$dst, (fdiv f32:$a, f32:$b))]>; def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b), "div.rn.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>; + [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>; // // FMA @@ -1410,14 +1410,14 @@ multiclass FMA multiclass FMA_F16 { def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set RC:$dst, (fma (T RC:$a), (T RC:$b), (T RC:$c)))]>, + [(set T:$dst, (fma T:$a, T:$b, T:$c))]>, Requires<[useFP16Math, Pred]>; } multiclass FMA_BF16 { def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set RC:$dst, (fma (T RC:$a), (T RC:$b), (T RC:$c)))]>, + [(set T:$dst, (fma T:$a, T:$b, T:$c))]>, Requires<[hasBF16Math, Pred]>; } @@ -1434,11 +1434,11 @@ defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>; // sin/cos def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "sin.approx.f32 \t$dst, $src;", - [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>, + [(set f32:$dst, (fsin f32:$src))]>, Requires<[allowUnsafeFPMath]>; def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "cos.approx.f32 \t$dst, $src;", - [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>, + [(set f32:$dst, (fcos f32:$src))]>, Requires<[allowUnsafeFPMath]>; // Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)), @@ -1446,25 +1446,25 @@ def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), // semantics of LLVM's frem. // frem - f32 FTZ -def : Pat<(frem Float32Regs:$x, Float32Regs:$y), +def : Pat<(frem f32:$x, f32:$y), (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ), Float32Regs:$y))>, Requires<[doF32FTZ, allowUnsafeFPMath]>; -def : Pat<(frem Float32Regs:$x, fpimm:$y), +def : Pat<(frem f32:$x, fpimm:$y), (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ), fpimm:$y))>, Requires<[doF32FTZ, allowUnsafeFPMath]>; -def : Pat<(frem Float32Regs:$x, Float32Regs:$y), +def : Pat<(frem f32:$x, Float32Regs:$y), (SELP_f32rr Float32Regs:$x, (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ), Float32Regs:$y)), (TESTINF_f32r Float32Regs:$y))>, Requires<[doF32FTZ, noUnsafeFPMath]>; -def : Pat<(frem Float32Regs:$x, fpimm:$y), +def : Pat<(frem f32:$x, fpimm:$y), (SELP_f32rr Float32Regs:$x, (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ), @@ -1473,25 +1473,25 @@ def : Pat<(frem Float32Regs:$x, fpimm:$y), Requires<[doF32FTZ, noUnsafeFPMath]>; // frem - f32 -def : Pat<(frem Float32Regs:$x, Float32Regs:$y), +def : Pat<(frem f32:$x, f32:$y), (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI), Float32Regs:$y))>, Requires<[allowUnsafeFPMath]>; -def : Pat<(frem Float32Regs:$x, fpimm:$y), +def : Pat<(frem f32:$x, fpimm:$y), (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI), fpimm:$y))>, Requires<[allowUnsafeFPMath]>; -def : Pat<(frem Float32Regs:$x, Float32Regs:$y), +def : Pat<(frem f32:$x, f32:$y), (SELP_f32rr Float32Regs:$x, (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI), Float32Regs:$y)), (TESTINF_f32r Float32Regs:$y))>, Requires<[noUnsafeFPMath]>; -def : Pat<(frem Float32Regs:$x, fpimm:$y), +def : Pat<(frem f32:$x, fpimm:$y), (SELP_f32rr Float32Regs:$x, (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI), @@ -1500,25 +1500,25 @@ def : Pat<(frem Float32Regs:$x, fpimm:$y), Requires<[noUnsafeFPMath]>; // frem - f64 -def : Pat<(frem Float64Regs:$x, Float64Regs:$y), +def : Pat<(frem f64:$x, f64:$y), (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI), Float64Regs:$y))>, Requires<[allowUnsafeFPMath]>; -def : Pat<(frem Float64Regs:$x, fpimm:$y), +def : Pat<(frem f64:$x, fpimm:$y), (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI), fpimm:$y))>, Requires<[allowUnsafeFPMath]>; -def : Pat<(frem Float64Regs:$x, Float64Regs:$y), +def : Pat<(frem f64:$x, f64:$y), (SELP_f64rr Float64Regs:$x, (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI), Float64Regs:$y)), (TESTINF_f64r Float64Regs:$y))>, Requires<[noUnsafeFPMath]>; -def : Pat<(frem Float64Regs:$x, fpimm:$y), +def : Pat<(frem f64:$x, fpimm:$y), (SELP_f64rr Float64Regs:$x, (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI), @@ -1536,35 +1536,35 @@ multiclass BITWISE { def b1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + [(set i1:$dst, (OpNode i1:$a, i1:$b))]>; def b1ri : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; + [(set i1:$dst, (OpNode i1:$a, imm:$b))]>; def b16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + [(set i16:$dst, (OpNode i16:$a, i16:$b))]>; def b16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + [(set i16:$dst, (OpNode i16:$a, imm:$b))]>; def b32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>; + [(set i32:$dst, (OpNode i32:$a, i32:$b))]>; def b32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>; + [(set i32:$dst, (OpNode i32:$a, imm:$b))]>; def b64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + [(set i64:$dst, (OpNode i64:$a, i64:$b))]>; def b64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + [(set i64:$dst, (OpNode i64:$a, imm:$b))]>; } defm OR : BITWISE<"or", or>; @@ -1572,46 +1572,46 @@ defm AND : BITWISE<"and", and>; defm XOR : BITWISE<"xor", xor>; // PTX does not support mul on predicates, convert to and instructions -def : Pat<(mul Int1Regs:$a, Int1Regs:$b), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>; -def : Pat<(mul Int1Regs:$a, (i1 imm:$b)), (ANDb1ri Int1Regs:$a, imm:$b)>; +def : Pat<(mul i1:$a, i1:$b), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>; +def : Pat<(mul i1:$a, imm:$b), (ANDb1ri Int1Regs:$a, imm:$b)>; // These transformations were once reliably performed by instcombine, but thanks // to poison semantics they are no longer safe for LLVM IR, perform them here // instead. -def : Pat<(select Int1Regs:$a, Int1Regs:$b, 0), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>; -def : Pat<(select Int1Regs:$a, 1, Int1Regs:$b), (ORb1rr Int1Regs:$a, Int1Regs:$b)>; +def : Pat<(select i1:$a, i1:$b, 0), (ANDb1rr Int1Regs:$a, Int1Regs:$b)>; +def : Pat<(select i1:$a, 1, i1:$b), (ORb1rr Int1Regs:$a, Int1Regs:$b)>; // Lower logical v2i16/v4i8 ops as bitwise ops on b32. foreach vt = [v2i16, v4i8] in { - def: Pat<(or (vt Int32Regs:$a), (vt Int32Regs:$b)), + def: Pat<(or vt:$a, vt:$b), (ORb32rr Int32Regs:$a, Int32Regs:$b)>; - def: Pat<(xor (vt Int32Regs:$a), (vt Int32Regs:$b)), + def: Pat<(xor vt:$a, vt:$b), (XORb32rr Int32Regs:$a, Int32Regs:$b)>; - def: Pat<(and (vt Int32Regs:$a), (vt Int32Regs:$b)), + def: Pat<(and vt:$a, vt:$b), (ANDb32rr Int32Regs:$a, Int32Regs:$b)>; // The constants get legalized into a bitcast from i32, so that's what we need // to match here. - def: Pat<(or Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + def: Pat<(or vt:$a, (vt (bitconvert (i32 imm:$b)))), (ORb32ri Int32Regs:$a, imm:$b)>; - def: Pat<(xor Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + def: Pat<(xor vt:$a, (vt (bitconvert (i32 imm:$b)))), (XORb32ri Int32Regs:$a, imm:$b)>; - def: Pat<(and Int32Regs:$a, (vt (bitconvert (i32 imm:$b)))), + def: Pat<(and vt:$a, (vt (bitconvert (i32 imm:$b)))), (ANDb32ri Int32Regs:$a, imm:$b)>; } def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), "not.pred \t$dst, $src;", - [(set Int1Regs:$dst, (not Int1Regs:$src))]>; + [(set i1:$dst, (not i1:$src))]>; def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), "not.b16 \t$dst, $src;", - [(set Int16Regs:$dst, (not Int16Regs:$src))]>; + [(set i16:$dst, (not i16:$src))]>; def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), "not.b32 \t$dst, $src;", - [(set (i32 Int32Regs:$dst), (not (i32 Int32Regs:$src)))]>; + [(set i32:$dst, (not i32:$src))]>; def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), "not.b64 \t$dst, $src;", - [(set Int64Regs:$dst, (not Int64Regs:$src))]>; + [(set i64:$dst, (not i64:$src))]>; // Template for left/right shifts. Takes three operands, // [dest (reg), src (reg), shift (reg or imm)]. @@ -1622,31 +1622,31 @@ multiclass SHIFT { def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 Int32Regs:$b)))]>; + [(set i64:$dst, (OpNode i64:$a, i32:$b))]>; def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>; + [(set i64:$dst, (OpNode i64:$a, (i32 imm:$b)))]>; def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>; + [(set i32:$dst, (OpNode i32:$a, i32:$b))]>; def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 imm:$b)))]>; + [(set i32:$dst, (OpNode i32:$a, (i32 imm:$b)))]>; def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>; + [(set i32:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>; def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b), !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 Int32Regs:$b)))]>; + [(set i16:$dst, (OpNode i16:$a, i32:$b))]>; def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>; + [(set i16:$dst, (OpNode i16:$a, (i32 imm:$b)))]>; } defm SHL : SHIFT<"shl.b", shl>; @@ -1657,11 +1657,11 @@ defm SRL : SHIFT<"shr.u", srl>; def BREV32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), "brev.b32 \t$dst, $a;", - [(set Int32Regs:$dst, (bitreverse (i32 Int32Regs:$a)))]>; + [(set i32:$dst, (bitreverse i32:$a))]>; def BREV64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a), "brev.b64 \t$dst, $a;", - [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>; + [(set i64:$dst, (bitreverse i64:$a))]>; // @@ -1694,17 +1694,17 @@ multiclass BFE { : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, Int32Regs:$c), !strconcat(Instr, " \t$d, $a, $b, $c;"), - [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>; + [(set T:$d, (bfe T:$a, i32:$b, i32:$c))]>; def rri : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, i32imm:$c), !strconcat(Instr, " \t$d, $a, $b, $c;"), - [(set (T RC:$d), (bfe (T RC:$a), (i32 Int32Regs:$b), (i32 imm:$c)))]>; + [(set T:$d, (bfe T:$a, i32:$b, imm:$c))]>; def rii : NVPTXInst<(outs RC:$d), (ins RC:$a, i32imm:$b, i32imm:$c), !strconcat(Instr, " \t$d, $a, $b, $c;"), - [(set (T RC:$d), (bfe (T RC:$a), (i32 imm:$b), (i32 imm:$c)))]>; + [(set T:$d, (bfe T:$a, imm:$b, imm:$c))]>; } multiclass BFI { @@ -1712,32 +1712,32 @@ multiclass BFI { : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), - [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>; + [(set T:$f, (bfi T:$a, T:$b, i32:$c, i32:$d))]>; def rrri : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d), !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), - [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>; + [(set T:$f, (bfi T:$a, T:$b, i32:$c, imm:$d))]>; def rrii : NVPTXInst<(outs RC:$f), (ins RC:$a, RC:$b, i32imm:$c, i32imm:$d), !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), - [(set (T RC:$f), (bfi (T RC:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; + [(set T:$f, (bfi T:$a, T:$b, imm:$c, imm:$d))]>; def irrr : NVPTXInst<(outs RC:$f), (ins ImmCls:$a, RC:$b, Int32Regs:$c, Int32Regs:$d), !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), - [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 Int32Regs:$d)))]>; + [(set T:$f, (bfi (T imm:$a), T:$b, i32:$c, i32:$d))]>; def irri : NVPTXInst<(outs RC:$f), (ins ImmCls:$a, RC:$b, Int32Regs:$c, i32imm:$d), !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), - [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 Int32Regs:$c), (i32 imm:$d)))]>; + [(set T:$f, (bfi (T imm:$a), T:$b, i32:$c, imm:$d))]>; def irii : NVPTXInst<(outs RC:$f), (ins ImmCls:$a, RC:$b, i32imm:$c, i32imm:$d), !strconcat(Instr, " \t$f, $a, $b, $c, $d;"), - [(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>; + [(set T:$f, (bfi (T imm:$a), T:$b, imm:$c, imm:$d))]>; } def Hexu32imm : Operand { @@ -1749,17 +1749,17 @@ multiclass PRMT { : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, Int32Regs:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), - [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>; + [(set T:$d, (prmt T:$a, T:$b, i32:$c, imm:$mode))]>; def rri : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, Hexu32imm:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), - [(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>; + [(set T:$d, (prmt T:$a, T:$b, imm:$c, imm:$mode))]>; def rii : NVPTXInst<(outs RC:$d), (ins RC:$a, i32imm:$b, Hexu32imm:$c, PrmtMode:$mode), !strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"), - [(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>; + [(set T:$d, (prmt T:$a, imm:$b, imm:$c, imm:$mode))]>; } let hasSideEffects = false in { @@ -1780,34 +1780,34 @@ let hasSideEffects = false in { // byte extraction + signed/unsigned extension to i32. -def : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), i8)), +def : Pat<(i32 (sext_inreg (bfe i32:$s, i32:$o, 8), i8)), (BFE_S32rri Int32Regs:$s, Int32Regs:$o, 8)>; -def : Pat<(i32 (sext_inreg (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), i8)), +def : Pat<(i32 (sext_inreg (bfe i32:$s, imm:$o, 8), i8)), (BFE_S32rii Int32Regs:$s, imm:$o, 8)>; -def : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 Int32Regs:$o), 8), 255)), +def : Pat<(i32 (and (bfe i32:$s, i32:$o, 8), 255)), (BFE_U32rri Int32Regs:$s, Int32Regs:$o, 8)>; -def : Pat<(i32 (and (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8), 255)), +def : Pat<(i32 (and (bfe i32:$s, imm:$o, 8), 255)), (BFE_U32rii Int32Regs:$s, imm:$o, 8)>; // byte extraction + signed extension to i16 -def : Pat<(i16 (sext_inreg (trunc (bfe (i32 Int32Regs:$s), (i32 imm:$o), 8)), i8)), - (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>; +def : Pat<(i16 (sext_inreg (trunc (bfe i32:$s, imm:$o, 8)), i8)), + (CVT_s8_s32 (BFE_S32rii i32:$s, imm:$o, 8), CvtNONE)>; // Byte extraction via shift/trunc/sext -def : Pat<(i16 (sext_inreg (trunc Int32Regs:$s), i8)), +def : Pat<(i16 (sext_inreg (trunc i32:$s), i8)), (CVT_s8_s32 Int32Regs:$s, CvtNONE)>; -def : Pat<(i16 (sext_inreg (trunc (srl (i32 Int32Regs:$s), (i32 imm:$o))), i8)), +def : Pat<(i16 (sext_inreg (trunc (srl i32:$s, (i32 imm:$o))), i8)), (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, imm:$o, 8), CvtNONE)>; -def : Pat<(sext_inreg (srl (i32 Int32Regs:$s), (i32 imm:$o)), i8), +def : Pat<(sext_inreg (srl i32:$s, (i32 imm:$o)), i8), (BFE_S32rii Int32Regs:$s, imm:$o, 8)>; -def : Pat<(i16 (sra (i16 (trunc Int32Regs:$s)), (i32 8))), +def : Pat<(i16 (sra (i16 (trunc i32:$s)), (i32 8))), (CVT_s8_s32 (BFE_S32rii Int32Regs:$s, 8, 8), CvtNONE)>; -def : Pat<(sext_inreg (srl (i64 Int64Regs:$s), (i32 imm:$o)), i8), +def : Pat<(sext_inreg (srl i64:$s, (i32 imm:$o)), i8), (BFE_S64rii Int64Regs:$s, imm:$o, 8)>; -def : Pat<(i16 (sext_inreg (trunc Int64Regs:$s), i8)), +def : Pat<(i16 (sext_inreg (trunc i64:$s), i8)), (CVT_s8_s64 Int64Regs:$s, CvtNONE)>; -def : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s), (i32 imm:$o))), i8)), +def : Pat<(i16 (sext_inreg (trunc (srl i64:$s, (i32 imm:$o))), i8)), (CVT_s8_s64 (BFE_S64rii Int64Regs:$s, imm:$o, 8), CvtNONE)>; //----------------------------------- @@ -1948,10 +1948,10 @@ def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; // Load a memory address into a u32 or u64 register. def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), "mov.u32 \t$dst, $a;", - [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; + [(set i32:$dst, (Wrapper tglobaladdr:$a))]>; def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), "mov.u64 \t$dst, $a;", - [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; + [(set i64:$dst, (Wrapper tglobaladdr:$a))]>; // Get pointer to local stack. let hasSideEffects = false in { @@ -1993,16 +1993,16 @@ let IsSimpleMove=1, hasSideEffects=0 in { def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), "mov.pred \t$dst, $src;", - [(set Int1Regs:$dst, imm:$src)]>; + [(set i1:$dst, imm:$src)]>; def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), "mov.u16 \t$dst, $src;", - [(set Int16Regs:$dst, imm:$src)]>; + [(set i16:$dst, imm:$src)]>; def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), "mov.u32 \t$dst, $src;", - [(set (i32 Int32Regs:$dst), imm:$src)]>; + [(set i32:$dst, imm:$src)]>; def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), "mov.u64 \t$dst, $src;", - [(set Int64Regs:$dst, imm:$src)]>; + [(set i64:$dst, imm:$src)]>; def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), "mov.b16 \t$dst, $src;", []>; @@ -2013,10 +2013,10 @@ def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), "mov.f32 \t$dst, $src;", - [(set Float32Regs:$dst, fpimm:$src)]>; + [(set f32:$dst, fpimm:$src)]>; def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), "mov.f64 \t$dst, $src;", - [(set Float64Regs:$dst, fpimm:$src)]>; + [(set f64:$dst, fpimm:$src)]>; def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>; @@ -2024,10 +2024,10 @@ def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>; //---- Copy Frame Index ---- def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), "add.u32 \t$dst, ${addr:add};", - [(set Int32Regs:$dst, ADDRri:$addr)]>; + [(set i32:$dst, ADDRri:$addr)]>; def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), "add.u64 \t$dst, ${addr:add};", - [(set Int64Regs:$dst, ADDRri64:$addr)]>; + [(set i64:$dst, ADDRri64:$addr)]>; //----------------------------------- // Comparison and Selection @@ -2055,45 +2055,45 @@ multiclass ISET_FORMAT pred def : Pat<(i1 (OpNode i16:$a, i16:$b)), (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; - def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), + def : Pat<(i1 (OpNode i16:$a, imm:$b)), (setp_16ri Int16Regs:$a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), + def : Pat<(i1 (OpNode imm:$a, i16:$b)), (setp_16ir imm:$a, Int16Regs:$b, Mode)>; // i32 -> pred def : Pat<(i1 (OpNode i32:$a, i32:$b)), (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; - def : Pat<(i1 (OpNode (i32 Int32Regs:$a), imm:$b)), + def : Pat<(i1 (OpNode i32:$a, imm:$b)), (setp_32ri Int32Regs:$a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, (i32 Int32Regs:$b))), + def : Pat<(i1 (OpNode imm:$a, i32:$b)), (setp_32ir imm:$a, Int32Regs:$b, Mode)>; // i64 -> pred - def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), + def : Pat<(i1 (OpNode i64:$a, i64:$b)), (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; - def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), + def : Pat<(i1 (OpNode i64:$a, imm:$b)), (setp_64ri Int64Regs:$a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), + def : Pat<(i1 (OpNode imm:$a, i64:$b)), (setp_64ir imm:$a, Int64Regs:$b, Mode)>; // i16 -> i32 def : Pat<(i32 (OpNode i16:$a, i16:$b)), (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), + def : Pat<(i32 (OpNode i16:$a, imm:$b)), (set_16ri Int16Regs:$a, imm:$b, Mode)>; - def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), + def : Pat<(i32 (OpNode imm:$a, i16:$b)), (set_16ir imm:$a, Int16Regs:$b, Mode)>; // i32 -> i32 def : Pat<(i32 (OpNode i32:$a, i32:$b)), (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; - def : Pat<(i32 (OpNode (i32 Int32Regs:$a), imm:$b)), + def : Pat<(i32 (OpNode i32:$a, imm:$b)), (set_32ri Int32Regs:$a, imm:$b, Mode)>; - def : Pat<(i32 (OpNode imm:$a, (i32 Int32Regs:$b))), + def : Pat<(i32 (OpNode imm:$a, i32:$b)), (set_32ir imm:$a, Int32Regs:$b, Mode)>; // i64 -> i32 - def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), + def : Pat<(i32 (OpNode i64:$a, Int64Regs:$b)), (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), + def : Pat<(i32 (OpNode i64:$a, imm:$b)), (set_64ri Int64Regs:$a, imm:$b, Mode)>; - def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), + def : Pat<(i32 (OpNode imm:$a, i64:$b)), (set_64ir imm:$a, Int64Regs:$b, Mode)>; } @@ -2200,142 +2200,142 @@ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)), (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>; // i1 compare -> i32 -def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), +def : Pat<(i32 (setne i1:$a, i1:$b)), (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; -def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), +def : Pat<(i32 (setne i1:$a, i1:$b)), (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; multiclass FSET_FORMAT { // f16 -> pred - def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + def : Pat<(i1 (OpNode f16:$a, f16:$b)), (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + def : Pat<(i1 (OpNode f16:$a, f16:$b)), (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + def : Pat<(i1 (OpNode f16:$a, fpimm:$b)), (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + def : Pat<(i1 (OpNode f16:$a, fpimm:$b)), (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + def : Pat<(i1 (OpNode fpimm:$a, f16:$b)), (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + def : Pat<(i1 (OpNode fpimm:$a, f16:$b)), (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; // bf16 -> pred - def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), + def : Pat<(i1 (OpNode bf16:$a, bf16:$b)), (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[hasBF16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), + def : Pat<(i1 (OpNode bf16:$a, bf16:$b)), (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[hasBF16Math]>; - def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), fpimm:$b)), + def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)), (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>, Requires<[hasBF16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), fpimm:$b)), + def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)), (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>, Requires<[hasBF16Math]>; - def : Pat<(i1 (OpNode fpimm:$a, (bf16 Int16Regs:$b))), + def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)), (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[hasBF16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, (bf16 Int16Regs:$b))), + def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)), (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[hasBF16Math]>; // f32 -> pred - def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), + def : Pat<(i1 (OpNode f32:$a, f32:$b)), (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), + def : Pat<(i1 (OpNode f32:$a, f32:$b)), (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), + def : Pat<(i1 (OpNode f32:$a, fpimm:$b)), (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; - def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), + def : Pat<(i1 (OpNode fpimm:$a, f32:$b)), (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), + def : Pat<(i1 (OpNode fpimm:$a, f32:$b)), (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; // f64 -> pred - def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), + def : Pat<(i1 (OpNode f64:$a, f64:$b)), (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; - def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), + def : Pat<(i1 (OpNode f64:$a, fpimm:$b)), (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; - def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), + def : Pat<(i1 (OpNode fpimm:$a, f64:$b)), (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; // f16 -> i32 - def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + def : Pat<(i32 (OpNode f16:$a, f16:$b)), (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + def : Pat<(i32 (OpNode f16:$a, f16:$b)), (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + def : Pat<(i32 (OpNode f16:$a, fpimm:$b)), (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + def : Pat<(i32 (OpNode f16:$a, fpimm:$b)), (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + def : Pat<(i32 (OpNode fpimm:$a, f16:$b)), (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + def : Pat<(i32 (OpNode fpimm:$a, f16:$b)), (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; // bf16 -> i32 - def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), + def : Pat<(i32 (OpNode bf16:$a, bf16:$b)), (SET_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[hasBF16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), + def : Pat<(i32 (OpNode bf16:$a, bf16:$b)), (SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[hasBF16Math]>; - def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), fpimm:$b)), + def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)), (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>, Requires<[hasBF16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), fpimm:$b)), + def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)), (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>, Requires<[hasBF16Math]>; - def : Pat<(i32 (OpNode fpimm:$a, (bf16 Int16Regs:$b))), + def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)), (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[hasBF16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode fpimm:$a, (bf16 Int16Regs:$b))), + def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)), (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[hasBF16Math]>; // f32 -> i32 - def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), + def : Pat<(i32 (OpNode f32:$a, f32:$b)), (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, Requires<[doF32FTZ]>; - def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), + def : Pat<(i32 (OpNode f32:$a, f32:$b)), (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), + def : Pat<(i32 (OpNode f32:$a, fpimm:$b)), (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, Requires<[doF32FTZ]>; - def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), + def : Pat<(i32 (OpNode f32:$a, fpimm:$b)), (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; - def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), + def : Pat<(i32 (OpNode fpimm:$a, f32:$b)), (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, Requires<[doF32FTZ]>; - def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), + def : Pat<(i32 (OpNode fpimm:$a, f32:$b)), (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; // f64 -> i32 - def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), + def : Pat<(i32 (OpNode f64:$a, f64:$b)), (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), + def : Pat<(i32 (OpNode f64:$a, fpimm:$b)), (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; - def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), + def : Pat<(i32 (OpNode fpimm:$a, f64:$b)), (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; } @@ -2714,7 +2714,7 @@ def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", [(CallVoid i32:$addr)]>; def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", - [(CallVoid Int64Regs:$addr)]>; + [(CallVoid i64:$addr)]>; def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", [(Prototype (i32 imm:$val))]>; @@ -2747,7 +2747,7 @@ def DeclareScalarRegInst : class MoveParamInst : NVPTXInst<(outs regclass:$dst), (ins regclass:$src), !strconcat("mov", asmstr, " \t$dst, $src;"), - [(set (T regclass:$dst), (MoveParam (T regclass:$src)))]>; + [(set T:$dst, (MoveParam T:$src))]>; class MoveParamSymbolInst : @@ -2782,7 +2782,7 @@ def PseudoUseParamF32 : PseudoUseParamInst; class ProxyRegInst : NVPTXInst<(outs regclass:$dst), (ins regclass:$src), !strconcat("mov.", SzStr, " \t$dst, $src;"), - [(set (T regclass:$dst), (ProxyReg (T regclass:$src)))]>; + [(set T:$dst, (ProxyReg T:$src))]>; def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>; def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>; @@ -3090,7 +3090,7 @@ class F_BITCONVERT.ret> : NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), !strconcat("mov.b", SzStr, " \t$d, $a;"), - [(set (TOut regclassOut:$d), (bitconvert (TIn regclassIn:$a)))]>; + [(set TOut:$d, (bitconvert TIn:$a))]>; def BITCONVERT_32_I2F : F_BITCONVERT<"32", i32, f32>; def BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>; @@ -3100,15 +3100,15 @@ def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>; foreach vt = [v2f16, v2bf16, v2i16, v4i8] in { def: Pat<(vt (bitconvert (f32 Float32Regs:$a))), (BITCONVERT_32_F2I Float32Regs:$a)>; -def: Pat<(f32 (bitconvert (vt Int32Regs:$a))), +def: Pat<(f32 (bitconvert vt:$a)), (BITCONVERT_32_I2F Int32Regs:$a)>; } foreach vt = [f16, bf16] in { def: Pat<(vt (bitconvert (i16 UInt16Const:$a))), (IMOVB16ri UInt16Const:$a)>; -def: Pat<(vt (bitconvert (i16 Int16Regs:$a))), +def: Pat<(vt (bitconvert i16:$a)), (ProxyRegI16 Int16Regs:$a)>; -def: Pat<(i16 (bitconvert (vt Int16Regs:$a))), +def: Pat<(i16 (bitconvert vt:$a)), (ProxyRegI16 Int16Regs:$a)>; } @@ -3129,279 +3129,279 @@ foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in { // and then cvt to floating-point. // sint -> f16 -def : Pat<(f16 (sint_to_fp Int1Regs:$a)), +def : Pat<(f16 (sint_to_fp i1:$a)), (CVT_f16_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>; def : Pat<(f16 (sint_to_fp Int16Regs:$a)), - (CVT_f16_s16 Int16Regs:$a, CvtRN)>; -def : Pat<(f16 (sint_to_fp Int32Regs:$a)), - (CVT_f16_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(f16 (sint_to_fp Int64Regs:$a)), - (CVT_f16_s64 Int64Regs:$a, CvtRN)>; + (CVT_f16_s16 i16:$a, CvtRN)>; +def : Pat<(f16 (sint_to_fp i32:$a)), + (CVT_f16_s32 i32:$a, CvtRN)>; +def : Pat<(f16 (sint_to_fp i64:$a)), + (CVT_f16_s64 i64:$a, CvtRN)>; // uint -> f16 -def : Pat<(f16 (uint_to_fp Int1Regs:$a)), +def : Pat<(f16 (uint_to_fp i1:$a)), (CVT_f16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; def : Pat<(f16 (uint_to_fp Int16Regs:$a)), - (CVT_f16_u16 Int16Regs:$a, CvtRN)>; -def : Pat<(f16 (uint_to_fp Int32Regs:$a)), - (CVT_f16_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(f16 (uint_to_fp Int64Regs:$a)), - (CVT_f16_u64 Int64Regs:$a, CvtRN)>; + (CVT_f16_u16 i16:$a, CvtRN)>; +def : Pat<(f16 (uint_to_fp i32:$a)), + (CVT_f16_u32 i32:$a, CvtRN)>; +def : Pat<(f16 (uint_to_fp i64:$a)), + (CVT_f16_u64 i64:$a, CvtRN)>; // sint -> bf16 -def : Pat<(bf16 (sint_to_fp Int1Regs:$a)), +def : Pat<(bf16 (sint_to_fp i1:$a)), (CVT_bf16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; -def : Pat<(bf16 (sint_to_fp Int16Regs:$a)), - (CVT_bf16_s16 Int16Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; -def : Pat<(bf16 (sint_to_fp Int32Regs:$a)), - (CVT_bf16_s32 Int32Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; -def : Pat<(bf16 (sint_to_fp Int64Regs:$a)), - (CVT_bf16_s64 Int64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; +def : Pat<(bf16 (sint_to_fp i16:$a)), + (CVT_bf16_s16 i16:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; +def : Pat<(bf16 (sint_to_fp i32:$a)), + (CVT_bf16_s32 i32:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; +def : Pat<(bf16 (sint_to_fp i64:$a)), + (CVT_bf16_s64 i64:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; // uint -> bf16 -def : Pat<(bf16 (uint_to_fp Int1Regs:$a)), +def : Pat<(bf16 (uint_to_fp i1:$a)), (CVT_bf16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; -def : Pat<(bf16 (uint_to_fp Int16Regs:$a)), - (CVT_bf16_u16 Int16Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; -def : Pat<(bf16 (uint_to_fp Int32Regs:$a)), - (CVT_bf16_u32 Int32Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; -def : Pat<(bf16 (uint_to_fp Int64Regs:$a)), - (CVT_bf16_u64 Int64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; +def : Pat<(bf16 (uint_to_fp i16:$a)), + (CVT_bf16_u16 i16:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; +def : Pat<(bf16 (uint_to_fp i32:$a)), + (CVT_bf16_u32 i32:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; +def : Pat<(bf16 (uint_to_fp i64:$a)), + (CVT_bf16_u64 i64:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; // sint -> f32 -def : Pat<(f32 (sint_to_fp Int1Regs:$a)), +def : Pat<(f32 (sint_to_fp i1:$a)), (CVT_f32_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f32 (sint_to_fp Int16Regs:$a)), - (CVT_f32_s16 Int16Regs:$a, CvtRN)>; -def : Pat<(f32 (sint_to_fp Int32Regs:$a)), - (CVT_f32_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(f32 (sint_to_fp Int64Regs:$a)), - (CVT_f32_s64 Int64Regs:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp i16:$a)), + (CVT_f32_s16 i16:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp i32:$a)), + (CVT_f32_s32 i32:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp i64:$a)), + (CVT_f32_s64 i64:$a, CvtRN)>; // uint -> f32 -def : Pat<(f32 (uint_to_fp Int1Regs:$a)), +def : Pat<(f32 (uint_to_fp i1:$a)), (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f32 (uint_to_fp Int16Regs:$a)), +def : Pat<(f32 (uint_to_fp i16:$a)), (CVT_f32_u16 Int16Regs:$a, CvtRN)>; -def : Pat<(f32 (uint_to_fp Int32Regs:$a)), - (CVT_f32_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(f32 (uint_to_fp Int64Regs:$a)), - (CVT_f32_u64 Int64Regs:$a, CvtRN)>; +def : Pat<(f32 (uint_to_fp i32:$a)), + (CVT_f32_u32 i32:$a, CvtRN)>; +def : Pat<(f32 (uint_to_fp i64:$a)), + (CVT_f32_u64 i64:$a, CvtRN)>; // sint -> f64 -def : Pat<(f64 (sint_to_fp Int1Regs:$a)), +def : Pat<(f64 (sint_to_fp i1:$a)), (CVT_f64_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f64 (sint_to_fp Int16Regs:$a)), +def : Pat<(f64 (sint_to_fp i16:$a)), (CVT_f64_s16 Int16Regs:$a, CvtRN)>; -def : Pat<(f64 (sint_to_fp Int32Regs:$a)), - (CVT_f64_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(f64 (sint_to_fp Int64Regs:$a)), - (CVT_f64_s64 Int64Regs:$a, CvtRN)>; +def : Pat<(f64 (sint_to_fp i32:$a)), + (CVT_f64_s32 i32:$a, CvtRN)>; +def : Pat<(f64 (sint_to_fp i64:$a)), + (CVT_f64_s64 i64:$a, CvtRN)>; // uint -> f64 -def : Pat<(f64 (uint_to_fp Int1Regs:$a)), +def : Pat<(f64 (uint_to_fp i1:$a)), (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f64 (uint_to_fp Int16Regs:$a)), +def : Pat<(f64 (uint_to_fp i16:$a)), (CVT_f64_u16 Int16Regs:$a, CvtRN)>; -def : Pat<(f64 (uint_to_fp Int32Regs:$a)), - (CVT_f64_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(f64 (uint_to_fp Int64Regs:$a)), - (CVT_f64_u64 Int64Regs:$a, CvtRN)>; +def : Pat<(f64 (uint_to_fp i32:$a)), + (CVT_f64_u32 i32:$a, CvtRN)>; +def : Pat<(f64 (uint_to_fp i64:$a)), + (CVT_f64_u64 i64:$a, CvtRN)>; // f16 -> sint -def : Pat<(i1 (fp_to_sint (f16 Int16Regs:$a))), +def : Pat<(i1 (fp_to_sint f16:$a)), (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint (f16 Int16Regs:$a))), - (CVT_s16_f16 (f16 Int16Regs:$a), CvtRZI)>; -def : Pat<(i32 (fp_to_sint (f16 Int16Regs:$a))), - (CVT_s32_f16 (f16 Int16Regs:$a), CvtRZI)>; -def : Pat<(i64 (fp_to_sint (f16 Int16Regs:$a))), +def : Pat<(i16 (fp_to_sint f16:$a)), + (CVT_s16_f16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint f16:$a)), + (CVT_s32_f16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint f16:$a)), (CVT_s64_f16 Int16Regs:$a, CvtRZI)>; // f16 -> uint -def : Pat<(i1 (fp_to_uint (f16 Int16Regs:$a))), +def : Pat<(i1 (fp_to_uint f16:$a)), (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint (f16 Int16Regs:$a))), +def : Pat<(i16 (fp_to_uint f16:$a)), (CVT_u16_f16 Int16Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint (f16 Int16Regs:$a))), +def : Pat<(i32 (fp_to_uint f16:$a)), (CVT_u32_f16 Int16Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint (f16 Int16Regs:$a))), +def : Pat<(i64 (fp_to_uint f16:$a)), (CVT_u64_f16 Int16Regs:$a, CvtRZI)>; // bf16 -> sint -def : Pat<(i1 (fp_to_sint (bf16 Int16Regs:$a))), +def : Pat<(i1 (fp_to_sint bf16:$a)), (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint (bf16 Int16Regs:$a))), - (CVT_s16_bf16 (bf16 Int16Regs:$a), CvtRZI)>; -def : Pat<(i32 (fp_to_sint (bf16 Int16Regs:$a))), - (CVT_s32_bf16 (bf16 Int16Regs:$a), CvtRZI)>; -def : Pat<(i64 (fp_to_sint (bf16 Int16Regs:$a))), +def : Pat<(i16 (fp_to_sint bf16:$a)), + (CVT_s16_bf16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint bf16:$a)), + (CVT_s32_bf16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint bf16:$a)), (CVT_s64_bf16 Int16Regs:$a, CvtRZI)>; // bf16 -> uint -def : Pat<(i1 (fp_to_uint (bf16 Int16Regs:$a))), +def : Pat<(i1 (fp_to_uint bf16:$a)), (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint (bf16 Int16Regs:$a))), +def : Pat<(i16 (fp_to_uint bf16:$a)), (CVT_u16_bf16 Int16Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint (bf16 Int16Regs:$a))), +def : Pat<(i32 (fp_to_uint bf16:$a)), (CVT_u32_bf16 Int16Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint (bf16 Int16Regs:$a))), +def : Pat<(i64 (fp_to_uint bf16:$a)), (CVT_u64_bf16 Int16Regs:$a, CvtRZI)>; // f32 -> sint -def : Pat<(i1 (fp_to_sint Float32Regs:$a)), +def : Pat<(i1 (fp_to_sint f32:$a)), (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint Float32Regs:$a)), +def : Pat<(i16 (fp_to_sint f32:$a)), (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i16 (fp_to_sint Float32Regs:$a)), +def : Pat<(i16 (fp_to_sint f32:$a)), (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_sint Float32Regs:$a)), +def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i32 (fp_to_sint Float32Regs:$a)), +def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_sint Float32Regs:$a)), +def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i64 (fp_to_sint Float32Regs:$a)), +def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; // f32 -> uint -def : Pat<(i1 (fp_to_uint Float32Regs:$a)), +def : Pat<(i1 (fp_to_uint f32:$a)), (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint Float32Regs:$a)), +def : Pat<(i16 (fp_to_uint f32:$a)), (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i16 (fp_to_uint Float32Regs:$a)), +def : Pat<(i16 (fp_to_uint f32:$a)), (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint Float32Regs:$a)), +def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i32 (fp_to_uint Float32Regs:$a)), +def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint Float32Regs:$a)), +def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i64 (fp_to_uint Float32Regs:$a)), +def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; // f64 -> sint -def : Pat<(i1 (fp_to_sint Float64Regs:$a)), +def : Pat<(i1 (fp_to_sint f64:$a)), (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint Float64Regs:$a)), +def : Pat<(i16 (fp_to_sint f64:$a)), (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_sint Float64Regs:$a)), +def : Pat<(i32 (fp_to_sint f64:$a)), (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_sint Float64Regs:$a)), +def : Pat<(i64 (fp_to_sint f64:$a)), (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; // f64 -> uint -def : Pat<(i1 (fp_to_uint Float64Regs:$a)), +def : Pat<(i1 (fp_to_uint f64:$a)), (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint Float64Regs:$a)), +def : Pat<(i16 (fp_to_uint f64:$a)), (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint Float64Regs:$a)), +def : Pat<(i32 (fp_to_uint f64:$a)), (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint Float64Regs:$a)), +def : Pat<(i64 (fp_to_uint f64:$a)), (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; // sext i1 -def : Pat<(i16 (sext Int1Regs:$a)), +def : Pat<(i16 (sext i1:$a)), (SELP_s16ii -1, 0, Int1Regs:$a)>; -def : Pat<(i32 (sext Int1Regs:$a)), +def : Pat<(i32 (sext i1:$a)), (SELP_s32ii -1, 0, Int1Regs:$a)>; -def : Pat<(i64 (sext Int1Regs:$a)), +def : Pat<(i64 (sext i1:$a)), (SELP_s64ii -1, 0, Int1Regs:$a)>; // zext i1 -def : Pat<(i16 (zext Int1Regs:$a)), +def : Pat<(i16 (zext i1:$a)), (SELP_u16ii 1, 0, Int1Regs:$a)>; -def : Pat<(i32 (zext Int1Regs:$a)), +def : Pat<(i32 (zext i1:$a)), (SELP_u32ii 1, 0, Int1Regs:$a)>; -def : Pat<(i64 (zext Int1Regs:$a)), +def : Pat<(i64 (zext i1:$a)), (SELP_u64ii 1, 0, Int1Regs:$a)>; // anyext i1 -def : Pat<(i16 (anyext Int1Regs:$a)), +def : Pat<(i16 (anyext i1:$a)), (SELP_u16ii -1, 0, Int1Regs:$a)>; -def : Pat<(i32 (anyext Int1Regs:$a)), +def : Pat<(i32 (anyext i1:$a)), (SELP_u32ii -1, 0, Int1Regs:$a)>; -def : Pat<(i64 (anyext Int1Regs:$a)), +def : Pat<(i64 (anyext i1:$a)), (SELP_u64ii -1, 0, Int1Regs:$a)>; // sext i16 -def : Pat<(i32 (sext Int16Regs:$a)), +def : Pat<(i32 (sext i16:$a)), (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; -def : Pat<(i64 (sext Int16Regs:$a)), +def : Pat<(i64 (sext i16:$a)), (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; // zext i16 -def : Pat<(i32 (zext Int16Regs:$a)), +def : Pat<(i32 (zext i16:$a)), (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; -def : Pat<(i64 (zext Int16Regs:$a)), +def : Pat<(i64 (zext i16:$a)), (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; // anyext i16 -def : Pat<(i32 (anyext Int16Regs:$a)), +def : Pat<(i32 (anyext i16:$a)), (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; -def : Pat<(i64 (anyext Int16Regs:$a)), +def : Pat<(i64 (anyext i16:$a)), (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; // sext i32 -def : Pat<(i64 (sext Int32Regs:$a)), +def : Pat<(i64 (sext i32:$a)), (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; // zext i32 -def : Pat<(i64 (zext Int32Regs:$a)), +def : Pat<(i64 (zext i32:$a)), (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; // anyext i32 -def : Pat<(i64 (anyext Int32Regs:$a)), +def : Pat<(i64 (anyext i32:$a)), (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; // truncate i64 -def : Pat<(i32 (trunc Int64Regs:$a)), +def : Pat<(i32 (trunc i64:$a)), (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; -def : Pat<(i16 (trunc Int64Regs:$a)), +def : Pat<(i16 (trunc i64:$a)), (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; -def : Pat<(i1 (trunc Int64Regs:$a)), +def : Pat<(i1 (trunc i64:$a)), (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; // truncate i32 -def : Pat<(i16 (trunc Int32Regs:$a)), +def : Pat<(i16 (trunc i32:$a)), (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; -def : Pat<(i1 (trunc Int32Regs:$a)), +def : Pat<(i1 (trunc i32:$a)), (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; // truncate i16 -def : Pat<(i1 (trunc Int16Regs:$a)), +def : Pat<(i1 (trunc i16:$a)), (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; // sext_inreg -def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; -def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; -def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; -def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; -def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; -def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; +def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; +def : Pat<(sext_inreg i32:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; +def : Pat<(sext_inreg i32:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; +def : Pat<(sext_inreg i64:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; +def : Pat<(sext_inreg i64:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; +def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; // Select instructions with 32-bit predicates -def : Pat<(select (i32 Int32Regs:$pred), i16:$a, i16:$b), +def : Pat<(select i32:$pred, i16:$a, i16:$b), (SELP_b16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select (i32 Int32Regs:$pred), i32:$a, i32:$b), +def : Pat<(select i32:$pred, i32:$a, i32:$b), (SELP_b32rr Int32Regs:$a, Int32Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select (i32 Int32Regs:$pred), Int64Regs:$a, Int64Regs:$b), +def : Pat<(select i32:$pred, i64:$a, i64:$b), (SELP_b64rr Int64Regs:$a, Int64Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select (i32 Int32Regs:$pred), (f16 Int16Regs:$a), (f16 Int16Regs:$b)), +def : Pat<(select i32:$pred, f16:$a, f16:$b), (SELP_f16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select (i32 Int32Regs:$pred), (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)), +def : Pat<(select i32:$pred, bf16:$a, bf16:$b), (SELP_bf16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select (i32 Int32Regs:$pred), Float32Regs:$a, Float32Regs:$b), +def : Pat<(select i32:$pred, f32:$a, f32:$b), (SELP_f32rr Float32Regs:$a, Float32Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select (i32 Int32Regs:$pred), Float64Regs:$a, Float64Regs:$b), +def : Pat<(select i32:$pred, f64:$a, f64:$b), (SELP_f64rr Float64Regs:$a, Float64Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; @@ -3464,32 +3464,32 @@ let hasSideEffects = false in { // Using partial vectorized move produces better SASS code for extraction of // upper/lower parts of an integer. -def : Pat<(i16 (trunc (srl Int32Regs:$s, (i32 16)))), +def : Pat<(i16 (trunc (srl i32:$s, (i32 16)))), (I32toI16H Int32Regs:$s)>; -def : Pat<(i16 (trunc (sra Int32Regs:$s, (i32 16)))), +def : Pat<(i16 (trunc (sra i32:$s, (i32 16)))), (I32toI16H Int32Regs:$s)>; -def : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))), +def : Pat<(i32 (trunc (srl i64:$s, (i32 32)))), (I64toI32H Int64Regs:$s)>; -def : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))), +def : Pat<(i32 (trunc (sra i64:$s, (i32 32)))), (I64toI32H Int64Regs:$s)>; -def: Pat<(i32 (sext (extractelt (v2i16 Int32Regs:$src), 0))), +def: Pat<(i32 (sext (extractelt v2i16:$src, 0))), (CVT_INREG_s32_s16 Int32Regs:$src)>; foreach vt = [v2f16, v2bf16, v2i16] in { -def : Pat<(extractelt (vt Int32Regs:$src), 0), +def : Pat<(extractelt vt:$src, 0), (I32toI16L Int32Regs:$src)>; -def : Pat<(extractelt (vt Int32Regs:$src), 1), +def : Pat<(extractelt vt:$src, 1), (I32toI16H Int32Regs:$src)>; } -def : Pat<(v2f16 (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b))), +def : Pat<(v2f16 (build_vector f16:$a, f16:$b)), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; -def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))), +def : Pat<(v2bf16 (build_vector bf16:$a, bf16:$b)), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; -def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), +def : Pat<(v2i16 (build_vector i16:$a, i16:$b)), (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>; -def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), +def: Pat<(v2i16 (scalar_to_vector i16:$a)), (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; // @@ -3509,16 +3509,16 @@ let hasSideEffects = false in { : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), "shf." # mode # ".b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (op (i32 Int32Regs:$hi), (i32 Int32Regs:$lo), (i32 imm:$amt)))]>, + [(set i32:$dst, + (op i32:$hi, i32:$lo, (i32 imm:$amt)))]>, Requires<[hasHWROT32]>; def _r : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), "shf." # mode # ".b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (op (i32 Int32Regs:$hi), (i32 Int32Regs:$lo), (i32 Int32Regs:$amt)))]>, + [(set i32:$dst, + (op i32:$hi, i32:$lo, i32:$amt))]>, Requires<[hasHWROT32]>; } @@ -3528,14 +3528,14 @@ let hasSideEffects = false in { defm SHF_R_WRAP : ShfInst<"r.wrap", fshr>; } -def : Pat<(i32 (int_nvvm_fshl_clamp (i32 Int32Regs:$hi), (i32 Int32Regs:$lo), (i32 Int32Regs:$amt))), - (SHF_L_CLAMP_r (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt))>; -def : Pat<(i32 (int_nvvm_fshl_clamp (i32 Int32Regs:$hi), (i32 Int32Regs:$lo), (i32 imm:$amt))), - (SHF_L_CLAMP_i (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 imm:$amt))>; -def : Pat<(i32 (int_nvvm_fshr_clamp (i32 Int32Regs:$hi), (i32 Int32Regs:$lo), (i32 Int32Regs:$amt))), - (SHF_R_CLAMP_r (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt))>; -def : Pat<(i32 (int_nvvm_fshr_clamp (i32 Int32Regs:$hi), (i32 Int32Regs:$lo), (i32 imm:$amt))), - (SHF_R_CLAMP_i (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 imm:$amt))>; +def : Pat<(i32 (int_nvvm_fshl_clamp i32:$hi, i32:$lo, i32:$amt)), + (SHF_L_CLAMP_r Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt)>; +def : Pat<(i32 (int_nvvm_fshl_clamp i32:$hi, i32:$lo, (i32 imm:$amt))), + (SHF_L_CLAMP_i Int32Regs:$lo, Int32Regs:$hi, imm:$amt)>; +def : Pat<(i32 (int_nvvm_fshr_clamp i32:$hi, i32:$lo, i32:$amt)), + (SHF_R_CLAMP_r Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt)>; +def : Pat<(i32 (int_nvvm_fshr_clamp i32:$hi, i32:$lo, (i32 imm:$amt))), + (SHF_R_CLAMP_i Int32Regs:$lo, Int32Regs:$hi, imm:$amt)>; // Count leading zeros let hasSideEffects = false in { @@ -3546,14 +3546,14 @@ let hasSideEffects = false in { } // 32-bit has a direct PTX instruction -def : Pat<(i32 (ctlz (i32 Int32Regs:$a))), (CLZr32 Int32Regs:$a)>; +def : Pat<(i32 (ctlz i32:$a)), (CLZr32 i32:$a)>; // The return type of the ctlz ISD node is the same as its input, but the PTX // ctz instruction always returns a 32-bit value. For ctlz.i64, convert the // ptx value to 64 bits to match the ISD node's semantics, unless we know we're // truncating back down to 32 bits. -def : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; -def : Pat<(i32 (trunc (i64 (ctlz Int64Regs:$a)))), (CLZr64 Int64Regs:$a)>; +def : Pat<(i64 (ctlz i64:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(i32 (trunc (i64 (ctlz i64:$a)))), (CLZr64 Int64Regs:$a)>; // For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the // result back to 16-bits if necessary. We also need to subtract 16 because @@ -3569,10 +3569,10 @@ def : Pat<(i32 (trunc (i64 (ctlz Int64Regs:$a)))), (CLZr64 Int64Regs:$a)>; // and then ctlz that value. This way we don't have to subtract 16 from the // result. Unfortunately today we don't have a way to generate // "mov b32reg, {b16imm, b16reg}", so we don't do this optimization. -def : Pat<(i16 (ctlz Int16Regs:$a)), +def : Pat<(i16 (ctlz i16:$a)), (SUBi16ri (CVT_u16_u32 (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>; -def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))), +def : Pat<(i32 (zext (i16 (ctlz i16:$a)))), (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>; // Population count @@ -3584,66 +3584,66 @@ let hasSideEffects = false in { } // 32-bit has a direct PTX instruction -def : Pat<(i32 (ctpop (i32 Int32Regs:$a))), (POPCr32 Int32Regs:$a)>; +def : Pat<(i32 (ctpop i32:$a)), (POPCr32 Int32Regs:$a)>; // For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit // to match the LLVM semantics. Just as with ctlz.i64, we provide a second // pattern that avoids the type conversion if we're truncating the result to // i32 anyway. -def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; -def : Pat<(i32 (trunc (i64 (ctpop Int64Regs:$a)))), (POPCr64 Int64Regs:$a)>; +def : Pat<(ctpop i64:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(i32 (trunc (i64 (ctpop i64:$a)))), (POPCr64 Int64Regs:$a)>; // For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits. // If we know that we're storing into an i32, we can avoid the final trunc. -def : Pat<(ctpop Int16Regs:$a), +def : Pat<(ctpop i16:$a), (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; -def : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))), +def : Pat<(i32 (zext (i16 (ctpop i16:$a)))), (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>; // fpround f32 -> f16 -def : Pat<(f16 (fpround Float32Regs:$a)), +def : Pat<(f16 (fpround f32:$a)), (CVT_f16_f32 Float32Regs:$a, CvtRN)>; // fpround f32 -> bf16 -def : Pat<(bf16 (fpround Float32Regs:$a)), +def : Pat<(bf16 (fpround f32:$a)), (CVT_bf16_f32 Float32Regs:$a, CvtRN)>, Requires<[hasPTX<70>, hasSM<80>]>; // fpround f64 -> f16 -def : Pat<(f16 (fpround Float64Regs:$a)), +def : Pat<(f16 (fpround f64:$a)), (CVT_f16_f64 Float64Regs:$a, CvtRN)>; // fpround f64 -> bf16 -def : Pat<(bf16 (fpround Float64Regs:$a)), +def : Pat<(bf16 (fpround f64:$a)), (CVT_bf16_f64 Float64Regs:$a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>; // fpround f64 -> f32 -def : Pat<(f32 (fpround Float64Regs:$a)), +def : Pat<(f32 (fpround f64:$a)), (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fpround Float64Regs:$a)), +def : Pat<(f32 (fpround f64:$a)), (CVT_f32_f64 Float64Regs:$a, CvtRN)>; // fpextend f16 -> f32 -def : Pat<(f32 (fpextend (f16 Int16Regs:$a))), +def : Pat<(f32 (fpextend f16:$a)), (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fpextend (f16 Int16Regs:$a))), +def : Pat<(f32 (fpextend f16:$a)), (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; // fpextend bf16 -> f32 -def : Pat<(f32 (fpextend (bf16 Int16Regs:$a))), +def : Pat<(f32 (fpextend bf16:$a)), (CVT_f32_bf16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fpextend (bf16 Int16Regs:$a))), +def : Pat<(f32 (fpextend bf16:$a)), (CVT_f32_bf16 Int16Regs:$a, CvtNONE)>, Requires<[hasPTX<71>, hasSM<80>]>; // fpextend f16 -> f64 -def : Pat<(f64 (fpextend (f16 Int16Regs:$a))), +def : Pat<(f64 (fpextend f16:$a)), (CVT_f64_f16 Int16Regs:$a, CvtNONE)>; // fpextend bf16 -> f64 -def : Pat<(f64 (fpextend (bf16 Int16Regs:$a))), +def : Pat<(f64 (fpextend bf16:$a)), (CVT_f64_bf16 Int16Regs:$a, CvtNONE)>, Requires<[hasPTX<78>, hasSM<90>]>; // fpextend f32 -> f64 -def : Pat<(f64 (fpextend Float32Regs:$a)), +def : Pat<(f64 (fpextend f32:$a)), (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f64 (fpextend Float32Regs:$a)), +def : Pat<(f64 (fpextend f32:$a)), (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; def retglue : SDNode<"NVPTXISD::RET_GLUE", SDTNone, @@ -3652,15 +3652,15 @@ def retglue : SDNode<"NVPTXISD::RET_GLUE", SDTNone, // fceil, ffloor, froundeven, ftrunc. multiclass CVT_ROUND { - def : Pat<(OpNode (f16 Int16Regs:$a)), + def : Pat<(OpNode f16:$a), (CVT_f16_f16 Int16Regs:$a, Mode)>; - def : Pat<(OpNode (bf16 Int16Regs:$a)), + def : Pat<(OpNode bf16:$a), (CVT_bf16_bf16 Int16Regs:$a, Mode)>; - def : Pat<(OpNode Float32Regs:$a), + def : Pat<(OpNode f32:$a), (CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>; - def : Pat<(OpNode Float32Regs:$a), + def : Pat<(OpNode f32:$a), (CVT_f32_f32 Float32Regs:$a, Mode)>, Requires<[doNoF32FTZ]>; - def : Pat<(OpNode Float64Regs:$a), + def : Pat<(OpNode f64:$a), (CVT_f64_f64 Float64Regs:$a, Mode)>; } @@ -3687,7 +3687,7 @@ let isTerminator=1 in { let isBranch=1 in def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), "@$a bra \t$target;", - [(brcond Int1Regs:$a, bb:$target)]>; + [(brcond i1:$a, bb:$target)]>; let isBranch=1 in def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), "@!$a bra \t$target;", []>; @@ -3697,7 +3697,7 @@ let isTerminator=1 in { "bra.uni \t$target;", [(br bb:$target)]>; } -def : Pat<(brcond (i32 Int32Regs:$a), bb:$target), +def : Pat<(brcond i32:$a, bb:$target), (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a @@ -3705,8 +3705,8 @@ def : Pat<(brcond (i32 Int32Regs:$a), bb:$target), // can fall through to the target block. The invertion is done by 'xor // condition, 1', which will be translated to (setne condition, -1). Since ptx // supports '@!pred bra target', we should use it. -def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), - (CBranchOther Int1Regs:$a, bb:$target)>; +def : Pat<(brcond (i1 (setne i1:$a, -1)), bb:$target), + (CBranchOther i1:$a, bb:$target)>; // Call def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, @@ -3775,7 +3775,7 @@ def DYNAMIC_STACKALLOC32 : (ins Int32Regs:$size, i32imm:$align), "alloca.u32 \t$ptr, $size, $align;\n\t" "cvta.local.u32 \t$ptr, $ptr;", - [(set (i32 Int32Regs:$ptr), (dyn_alloca Int32Regs:$size, (i32 timm:$align)))]>, + [(set i32:$ptr, (dyn_alloca i32:$size, (i32 timm:$align)))]>, Requires<[hasPTX<73>, hasSM<52>]>; def DYNAMIC_STACKALLOC64 : @@ -3783,7 +3783,7 @@ def DYNAMIC_STACKALLOC64 : (ins Int64Regs:$size, i32imm:$align), "alloca.u64 \t$ptr, $size, $align;\n\t" "cvta.local.u64 \t$ptr, $ptr;", - [(set Int64Regs:$ptr, (dyn_alloca Int64Regs:$size, (i32 timm:$align)))]>, + [(set i64:$ptr, (dyn_alloca i64:$size, (i32 timm:$align)))]>, Requires<[hasPTX<73>, hasSM<52>]>; @@ -3820,7 +3820,7 @@ let isTerminator = 1, isBranch = 1, isIndirectBranch = 1, isNotDuplicable = 1 in def BRX_END : NVPTXInst<(outs), (ins brtarget:$target, Int32Regs:$val, i32imm:$id), "\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;", - [(brx_end bb:$target, (i32 Int32Regs:$val), (i32 imm:$id))]> { + [(brx_end bb:$target, i32:$val, (i32 imm:$id))]> { let isBarrier = 1; } } @@ -3833,9 +3833,9 @@ foreach a_type = ["s", "u"] in { NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), "dp4a." # a_type # "32." # b_type # "32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, + [(set i32:$dst, (!cast("int_nvvm_idp4a_" # a_type # "_" # b_type) - (i32 Int32Regs:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>, + i32:$a, i32:$b, i32:$c))]>, Requires<[hasDotInstructions]>; foreach is_hi = [0, -1] in { @@ -3845,9 +3845,9 @@ foreach a_type = ["s", "u"] in { NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), "dp2a." # lohi_suffix # "." # a_type # "32." # b_type # "32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, + [(set i32:$dst, (!cast("int_nvvm_idp2a_" # a_type # "_" # b_type) - (i32 Int32Regs:$a), (i32 Int32Regs:$b), is_hi, (i32 Int32Regs:$c)))]>, + i32:$a, i32:$b, is_hi, i32:$c))]>, Requires<[hasDotInstructions]>; } } @@ -3870,25 +3870,25 @@ def stacksave : def STACKRESTORE_32 : NVPTXInst<(outs), (ins Int32Regs:$ptr), "stackrestore.u32 \t$ptr;", - [(stackrestore (i32 Int32Regs:$ptr))]>, + [(stackrestore i32:$ptr)]>, Requires<[hasPTX<73>, hasSM<52>]>; def STACKSAVE_32 : NVPTXInst<(outs Int32Regs:$dst), (ins), "stacksave.u32 \t$dst;", - [(set Int32Regs:$dst, (i32 stacksave))]>, + [(set i32:$dst, (i32 stacksave))]>, Requires<[hasPTX<73>, hasSM<52>]>; def STACKRESTORE_64 : NVPTXInst<(outs), (ins Int64Regs:$ptr), "stackrestore.u64 \t$ptr;", - [(stackrestore (i64 Int64Regs:$ptr))]>, + [(stackrestore i64:$ptr)]>, Requires<[hasPTX<73>, hasSM<52>]>; def STACKSAVE_64 : NVPTXInst<(outs Int64Regs:$dst), (ins), "stacksave.u64 \t$dst;", - [(set Int64Regs:$dst, (i64 stacksave))]>, + [(set i64:$dst, (i64 stacksave))]>, Requires<[hasPTX<73>, hasSM<52>]>; include "NVPTXIntrinsics.td" @@ -3983,19 +3983,19 @@ def FMARELU_F16X2_FTZ : NVPTXInst_rrr, hasSM<80>]>; // FTZ -def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), fpimm_any_zero)), +def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)), (FMARELU_F16_FTZ Int16Regs:$a, Int16Regs:$b, Int16Regs:$c)>, Requires<[doF32FTZ]>; -def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), fpimm_positive_zero_v2f16)), +def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)), (FMARELU_F16X2_FTZ Int32Regs:$a, Int32Regs:$b, Int32Regs:$c)>, Requires<[doF32FTZ]>; // NO FTZ -def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), fpimm_any_zero)), +def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)), (FMARELU_F16 Int16Regs:$a, Int16Regs:$b, Int16Regs:$c)>; -def : Pat<(bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), fpimm_any_zero)), +def : Pat<(bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan bf16:$a, bf16:$b, bf16:$c), fpimm_any_zero)), (FMARELU_BF16 Int16Regs:$a, Int16Regs:$b, Int16Regs:$c)>; -def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), fpimm_positive_zero_v2f16)), +def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)), (FMARELU_F16X2 Int32Regs:$a, Int32Regs:$b, Int32Regs:$c)>; -def : Pat<(v2bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), fpimm_positive_zero_v2bf16)), +def : Pat<(v2bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2bf16:$a, v2bf16:$b, v2bf16:$c), fpimm_positive_zero_v2bf16)), (FMARELU_BF16X2 Int32Regs:$a, Int32Regs:$b, Int32Regs:$c)>; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 8364b658495c7..256161d5d79c7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -82,17 +82,17 @@ def INT_BARRIER0 : NVPTXInst<(outs), (ins), [(int_nvvm_barrier0)]>; def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), "bar.sync \t$src1;", - [(int_nvvm_barrier_n Int32Regs:$src1)]>; + [(int_nvvm_barrier_n i32:$src1)]>; def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), "bar.sync \t$src1, $src2;", - [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; + [(int_nvvm_barrier i32:$src1, i32:$src2)]>; def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", "setp.ne.u32 \t%p1, $pred, 0; \n\t", "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", "}}"), - [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; + [(set i32:$dst, (int_nvvm_barrier0_popc i32:$pred))]>; def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", @@ -101,7 +101,7 @@ def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), "bar.red.and.pred \t%p2, 0, %p1; \n\t", "selp.u32 \t$dst, 1, 0, %p2; \n\t", "}}"), - [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; + [(set i32:$dst, (int_nvvm_barrier0_and i32:$pred))]>; def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), !strconcat("{{ \n\t", ".reg .pred \t%p1; \n\t", @@ -110,7 +110,7 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), "bar.red.or.pred \t%p2, 0, %p1; \n\t", "selp.u32 \t$dst, 1, 0, %p2; \n\t", "}}"), - [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; + [(set i32:$dst, (int_nvvm_barrier0_or i32:$pred))]>; def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", [(int_nvvm_bar_sync imm:$i)]>; @@ -119,27 +119,27 @@ def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i [(int_nvvm_bar_warp_sync imm:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", - [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, + [(int_nvvm_bar_warp_sync i32:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", [(int_nvvm_barrier_sync imm:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", - [(int_nvvm_barrier_sync Int32Regs:$i)]>, + [(int_nvvm_barrier_sync i32:$i)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), "barrier.sync \t$id, $cnt;", - [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, + [(int_nvvm_barrier_sync_cnt i32:$id, i32:$cnt)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), "barrier.sync \t$id, $cnt;", - [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, + [(int_nvvm_barrier_sync_cnt i32:$id, imm:$cnt)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), "barrier.sync \t$id, $cnt;", - [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, + [(int_nvvm_barrier_sync_cnt imm:$id, i32:$cnt)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), "barrier.sync \t$id, $cnt;", @@ -230,7 +230,7 @@ foreach sync = [false, true] in { multiclass VOTE { def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), "vote." # mode # " \t$dest, $pred;", - [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, + [(set regclass:$dest, (IntOp i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; } @@ -243,11 +243,11 @@ defm VOTE_BALLOT : VOTE; multiclass VOTE_SYNC { def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), "vote.sync." # mode # " \t$dest, $pred, $mask;", - [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, + [(set regclass:$dest, (IntOp imm:$mask, i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), "vote.sync." # mode #" \t$dest, $pred, $mask;", - [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, + [(set regclass:$dest, (IntOp i32:$mask, i1:$pred))]>, Requires<[hasPTX<60>, hasSM<30>]>; } @@ -259,37 +259,37 @@ defm VOTE_SYNC_BALLOT : VOTE_SYNC, + [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync imm:$mask))]>, Requires<[hasPTX<80>, hasSM<90>]>; def INT_ELECT_SYNC_R : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask), "elect.sync \t$dest|$pred, $mask;", - [(set Int32Regs:$dest, Int1Regs:$pred, (int_nvvm_elect_sync Int32Regs:$mask))]>, + [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync i32:$mask))]>, Requires<[hasPTX<80>, hasSM<90>]>; multiclass MATCH_ANY_SYNC { def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, + [(set i32:$dest, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set i32:$dest, (IntOp i32:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, + [(set i32:$dest, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set i32:$dest, (IntOp i32:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; } // activemask.b32 def ACTIVEMASK : NVPTXInst<(outs Int32Regs:$dest), (ins), "activemask.b32 \t$dest;", - [(set Int32Regs:$dest, (int_nvvm_activemask))]>, + [(set i32:$dest, (int_nvvm_activemask))]>, Requires<[hasPTX<62>, hasSM<30>]>; defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC, + [(set i32:$dest, i1:$pred, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set i32:$dest, i1:$pred, (IntOp i32:$mask, imm:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, + [(set i32:$dest, i1:$pred, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set i32:$dest, i1:$pred, (IntOp i32:$mask, regclass:$value))]>, Requires<[hasPTX<60>, hasSM<70>]>; } defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC { def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", - [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, + [(set i32:$dst, (Intrin i32:$src, Int32Regs:$mask))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -384,7 +384,7 @@ def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_SYS: class FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE : NVPTXInst<(outs), (ins Int64Regs:$addr), "fence.proxy.tensormap::generic.acquire." # Scope # " [$addr], 128;", - [(Intr Int64Regs:$addr, (i32 128))]>, + [(Intr i64:$addr, (i32 128))]>, Requires<[hasPTX<83>, hasSM<90>]>; def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CTA : @@ -407,11 +407,11 @@ def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_SYS : multiclass CP_ASYNC_MBARRIER_ARRIVE { def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), - [(Intrin Int32Regs:$addr)]>, + [(Intrin i32:$addr)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), - [(Intrin Int64Regs:$addr)]>, + [(Intrin i64:$addr)]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -427,28 +427,28 @@ defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : multiclass CP_ASYNC_SHARED_GLOBAL_I { def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), - [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, + [(Intrin i32:$dst, i32:$src)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), - [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, + [(Intrin i64:$dst, i64:$src)]>, Requires<[hasPTX<70>, hasSM<80>]>; // Variant with src_size parameter def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), - [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>, + [(IntrinS i32:$dst, i32:$src, i32:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), - [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>, + [(IntrinS i32:$dst, i32:$src, imm:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), - [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>, + [(IntrinS i64:$dst, i64:$src, i32:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size), !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), - [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>, + [(IntrinS i64:$dst, i64:$src, imm:$src_size)]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -474,7 +474,7 @@ def CP_ASYNC_COMMIT_GROUP : def CP_ASYNC_WAIT_GROUP : NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", - [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, + [(int_nvvm_cp_async_wait_group timm:$n)]>, Requires<[hasPTX<70>, hasSM<80>]>; def CP_ASYNC_WAIT_ALL : @@ -490,12 +490,12 @@ def CP_ASYNC_BULK_COMMIT_GROUP : def CP_ASYNC_BULK_WAIT_GROUP : NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;", - [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>, + [(int_nvvm_cp_async_bulk_wait_group timm:$n)]>, Requires<[hasPTX<80>, hasSM<90>]>; def CP_ASYNC_BULK_WAIT_GROUP_READ : NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;", - [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>, + [(int_nvvm_cp_async_bulk_wait_group_read timm:$n)]>, Requires<[hasPTX<80>, hasSM<90>]>; //----------------------------------- @@ -686,11 +686,11 @@ foreach dim = [1, 2, 3, 4, 5] in { multiclass MBARRIER_INIT { def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), - [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, + [(Intrin i32:$addr, i32:$count)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), - [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, + [(Intrin i64:$addr, i32:$count)]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -701,11 +701,11 @@ defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", multiclass MBARRIER_INVAL { def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), - [(Intrin Int32Regs:$addr)]>, + [(Intrin i32:$addr)]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), - [(Intrin Int64Regs:$addr)]>, + [(Intrin i64:$addr)]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -716,11 +716,11 @@ defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", multiclass MBARRIER_ARRIVE { def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), - [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, + [(set i64:$state, (Intrin i32:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), - [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, + [(set i64:$state, (Intrin i64:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -733,13 +733,13 @@ multiclass MBARRIER_ARRIVE_NOCOMPLETE { (ins Int32Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.arrive.noComplete", AddrSpace, ".b64 $state, [$addr], $count;"), - [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, + [(set i64:$state, (Intrin i32:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.arrive.noComplete", AddrSpace, ".b64 $state, [$addr], $count;"), - [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, + [(set i64:$state, (Intrin i64:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -752,12 +752,12 @@ multiclass MBARRIER_ARRIVE_DROP { def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), !strconcat("mbarrier.arrive_drop", AddrSpace, ".b64 $state, [$addr];"), - [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, + [(set i64:$state, (Intrin i32:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), !strconcat("mbarrier.arrive_drop", AddrSpace, ".b64 $state, [$addr];"), - [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, + [(set i64:$state, (Intrin i64:$addr))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -771,13 +771,13 @@ multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE { (ins Int32Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, ".b64 $state, [$addr], $count;"), - [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, + [(set i64:$state, (Intrin i32:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr, Int32Regs:$count), !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, ".b64 $state, [$addr], $count;"), - [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, + [(set i64:$state, (Intrin i64:$addr, i32:$count))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -790,11 +790,11 @@ defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : multiclass MBARRIER_TEST_WAIT { def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), - [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, + [(set i1:$res, (Intrin i32:$addr, i64:$state))]>, Requires<[hasPTX<70>, hasSM<80>]>; def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), - [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, + [(set i1:$res, (Intrin i64:$addr, i64:$state))]>, Requires<[hasPTX<70>, hasSM<80>]>; } @@ -806,7 +806,7 @@ defm MBARRIER_TEST_WAIT_SHARED : class MBARRIER_PENDING_COUNT : NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), "mbarrier.pending_count.b64 $res, $state;", - [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, + [(set i32:$res, (Intrin i64:$state))]>, Requires<[hasPTX<70>, hasSM<80>]>; def MBARRIER_PENDING_COUNT : @@ -823,29 +823,29 @@ def MBARRIER_PENDING_COUNT : // Same story for fmax, fmin. def : Pat<(int_nvvm_fmin_f immFloat1, - (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), + (int_nvvm_fmax_f immFloat0, f32:$a)), (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; def : Pat<(int_nvvm_fmin_f immFloat1, - (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), + (int_nvvm_fmax_f f32:$a, immFloat0)), (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; def : Pat<(int_nvvm_fmin_f - (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), + (int_nvvm_fmax_f immFloat0, f32:$a), immFloat1), (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; def : Pat<(int_nvvm_fmin_f - (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), + (int_nvvm_fmax_f f32:$a, immFloat0), immFloat1), (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d immDouble1, - (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), + (int_nvvm_fmax_d immDouble0, f64:$a)), (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d immDouble1, - (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), + (int_nvvm_fmax_d f64:$a, immDouble0)), (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d - (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), + (int_nvvm_fmax_d immDouble0, f64:$a), immDouble1), (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; def : Pat<(int_nvvm_fmin_d - (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), + (int_nvvm_fmax_d f64:$a, immDouble0), immDouble1), (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; @@ -890,7 +890,7 @@ def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$ [(int_nvvm_nanosleep imm:$i)]>, Requires<[hasPTX<63>, hasSM<70>]>; def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;", - [(int_nvvm_nanosleep Int32Regs:$i)]>, + [(int_nvvm_nanosleep i32:$i)]>, Requires<[hasPTX<63>, hasSM<70>]>; // // Min Max @@ -1124,16 +1124,16 @@ def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; -def : Pat<(int_nvvm_div_full Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_div_full f32:$a, f32:$b), (FDIV32rr Float32Regs:$a, Float32Regs:$b)>; -def : Pat<(int_nvvm_div_full Float32Regs:$a, fpimm:$b), +def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b), (FDIV32ri Float32Regs:$a, f32imm:$b)>; -def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b), (FDIV32rr_ftz Float32Regs:$a, Float32Regs:$b)>; -def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, fpimm:$b), +def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b), (FDIV32ri_ftz Float32Regs:$a, f32imm:$b)>; // @@ -1157,18 +1157,18 @@ def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;", // Floor Ceil // -def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), +def : Pat<(int_nvvm_floor_ftz_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_floor_f Float32Regs:$a), +def : Pat<(int_nvvm_floor_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_floor_d Float64Regs:$a), +def : Pat<(int_nvvm_floor_d f64:$a), (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), +def : Pat<(int_nvvm_ceil_ftz_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_ceil_f Float32Regs:$a), +def : Pat<(int_nvvm_ceil_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_ceil_d Float64Regs:$a), +def : Pat<(int_nvvm_ceil_d f64:$a), (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; // @@ -1192,12 +1192,12 @@ def fcopysign_nvptx : SDNode<"NVPTXISD::FCOPYSIGN", SDTFPBinOp>; def COPYSIGN_F : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1), "copysign.f32 \t$dst, $src0, $src1;", - [(set Float32Regs:$dst, (fcopysign_nvptx Float32Regs:$src1, Float32Regs:$src0))]>; + [(set f32:$dst, (fcopysign_nvptx f32:$src1, f32:$src0))]>; def COPYSIGN_D : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1), "copysign.f64 \t$dst, $src0, $src1;", - [(set Float64Regs:$dst, (fcopysign_nvptx Float64Regs:$src1, Float64Regs:$src0))]>; + [(set f64:$dst, (fcopysign_nvptx f64:$src1, f64:$src0))]>; // // Abs, Neg bf16, bf16x2 @@ -1216,33 +1216,33 @@ def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs, // Round // -def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), +def : Pat<(int_nvvm_round_ftz_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_round_f Float32Regs:$a), +def : Pat<(int_nvvm_round_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_round_d Float64Regs:$a), +def : Pat<(int_nvvm_round_d f64:$a), (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; // // Trunc // -def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), +def : Pat<(int_nvvm_trunc_ftz_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_trunc_f Float32Regs:$a), +def : Pat<(int_nvvm_trunc_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_trunc_d Float64Regs:$a), +def : Pat<(int_nvvm_trunc_d f64:$a), (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; // // Saturate // -def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), +def : Pat<(int_nvvm_saturate_ftz_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; -def : Pat<(int_nvvm_saturate_f Float32Regs:$a), +def : Pat<(int_nvvm_saturate_f f32:$a), (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; -def : Pat<(int_nvvm_saturate_d Float64Regs:$a), +def : Pat<(int_nvvm_saturate_d f64:$a), (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; // @@ -1429,13 +1429,13 @@ def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_sqrt_rp_d>; // nvvm_sqrt intrinsic -def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), +def : Pat<(int_nvvm_sqrt_f f32:$a), (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; -def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), +def : Pat<(int_nvvm_sqrt_f f32:$a), (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; -def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), +def : Pat<(int_nvvm_sqrt_f f32:$a), (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; -def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), +def : Pat<(int_nvvm_sqrt_f f32:$a), (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; // @@ -1455,24 +1455,24 @@ def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; // 1.0f / sqrt_approx -> rsqrt_approx -def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f Float32Regs:$a)), +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f f32:$a)), (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>, Requires<[doRsqrtOpt]>; -def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f Float32Regs:$a)), +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f f32:$a)), (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doRsqrtOpt]>; // same for int_nvvm_sqrt_f when non-precision sqrt is requested -def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)), +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)), (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>, Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>; -def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)), +def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)), (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>; -def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)), +def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)), (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>, Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>; -def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)), +def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)), (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>; // @@ -1515,12 +1515,12 @@ foreach t = [I32RT, I64RT] in { def BFIND_ # sign # t.Size : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src), "bfind." # sign # t.Size # " \t$dst, $src;", - [(set (i32 Int32Regs:$dst), (flo_intrin (t.Ty t.RC:$src), 0))]>; + [(set i32:$dst, (flo_intrin t.Ty:$src, 0))]>; def BFIND_SHIFTAMT_ # sign # t.Size : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src), "bfind.shiftamt." # sign # t.Size # " \t$dst, $src;", - [(set (i32 Int32Regs:$dst), (flo_intrin (t.Ty t.RC:$src), -1))]>; + [(set i32:$dst, (flo_intrin t.Ty:$src, -1))]>; } } @@ -1528,142 +1528,142 @@ foreach t = [I32RT, I64RT] in { // Convert // -def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), +def : Pat<(int_nvvm_d2f_rn_ftz f64:$a), (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; -def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), +def : Pat<(int_nvvm_d2f_rn f64:$a), (CVT_f32_f64 Float64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), +def : Pat<(int_nvvm_d2f_rz_ftz f64:$a), (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; -def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), +def : Pat<(int_nvvm_d2f_rz f64:$a), (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), +def : Pat<(int_nvvm_d2f_rm_ftz f64:$a), (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; -def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), +def : Pat<(int_nvvm_d2f_rm f64:$a), (CVT_f32_f64 Float64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), +def : Pat<(int_nvvm_d2f_rp_ftz f64:$a), (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; -def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), +def : Pat<(int_nvvm_d2f_rp f64:$a), (CVT_f32_f64 Float64Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), +def : Pat<(int_nvvm_d2i_rn f64:$a), (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), +def : Pat<(int_nvvm_d2i_rz f64:$a), (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), +def : Pat<(int_nvvm_d2i_rm f64:$a), (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), +def : Pat<(int_nvvm_d2i_rp f64:$a), (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), +def : Pat<(int_nvvm_d2ui_rn f64:$a), (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), +def : Pat<(int_nvvm_d2ui_rz f64:$a), (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), +def : Pat<(int_nvvm_d2ui_rm f64:$a), (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), +def : Pat<(int_nvvm_d2ui_rp f64:$a), (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), +def : Pat<(int_nvvm_i2d_rn i32:$a), (CVT_f64_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), +def : Pat<(int_nvvm_i2d_rz i32:$a), (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), +def : Pat<(int_nvvm_i2d_rm i32:$a), (CVT_f64_s32 Int32Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), +def : Pat<(int_nvvm_i2d_rp i32:$a), (CVT_f64_s32 Int32Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), +def : Pat<(int_nvvm_ui2d_rn i32:$a), (CVT_f64_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), +def : Pat<(int_nvvm_ui2d_rz i32:$a), (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), +def : Pat<(int_nvvm_ui2d_rm i32:$a), (CVT_f64_u32 Int32Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), +def : Pat<(int_nvvm_ui2d_rp i32:$a), (CVT_f64_u32 Int32Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2i_rn_ftz f32:$a), (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), +def : Pat<(int_nvvm_f2i_rn f32:$a), (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2i_rz_ftz f32:$a), (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), +def : Pat<(int_nvvm_f2i_rz f32:$a), (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2i_rm_ftz f32:$a), (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), +def : Pat<(int_nvvm_f2i_rm f32:$a), (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2i_rp_ftz f32:$a), (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), +def : Pat<(int_nvvm_f2i_rp f32:$a), (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ui_rn_ftz f32:$a), (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), +def : Pat<(int_nvvm_f2ui_rn f32:$a), (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ui_rz_ftz f32:$a), (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), +def : Pat<(int_nvvm_f2ui_rz f32:$a), (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ui_rm_ftz f32:$a), (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), +def : Pat<(int_nvvm_f2ui_rm f32:$a), (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ui_rp_ftz f32:$a), (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), +def : Pat<(int_nvvm_f2ui_rp f32:$a), (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), +def : Pat<(int_nvvm_i2f_rn i32:$a), (CVT_f32_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), +def : Pat<(int_nvvm_i2f_rz i32:$a), (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), +def : Pat<(int_nvvm_i2f_rm i32:$a), (CVT_f32_s32 Int32Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), +def : Pat<(int_nvvm_i2f_rp i32:$a), (CVT_f32_s32 Int32Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), +def : Pat<(int_nvvm_ui2f_rn i32:$a), (CVT_f32_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), +def : Pat<(int_nvvm_ui2f_rz i32:$a), (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), +def : Pat<(int_nvvm_ui2f_rm i32:$a), (CVT_f32_u32 Int32Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), +def : Pat<(int_nvvm_ui2f_rp i32:$a), (CVT_f32_u32 Int32Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b), (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; -def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b), (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; -def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b), (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; -def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b), (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; -def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b), (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; -def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b), (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; -def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b), (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; -def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b), (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; -def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a), +def : Pat<(int_nvvm_f2bf16_rn f32:$a), (CVT_bf16_f32 Float32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a), +def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a), (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>; -def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a), +def : Pat<(int_nvvm_f2bf16_rz f32:$a), (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a), +def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a), (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>; def CVT_tf32_f32 : NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a), "cvt.rna.tf32.f32 \t$dest, $a;", - [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>; + [(set i32:$dest, (int_nvvm_f2tf32_rna f32:$a))]>; def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; @@ -1681,107 +1681,107 @@ def INT_NVVM_D2I_HI : F_MATH_1< "}}"), Int32Regs, Float64Regs, int_nvvm_d2i_hi>; -def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ll_rn_ftz f32:$a), (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), +def : Pat<(int_nvvm_f2ll_rn f32:$a), (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ll_rz_ftz f32:$a), (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), +def : Pat<(int_nvvm_f2ll_rz f32:$a), (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ll_rm_ftz f32:$a), (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), +def : Pat<(int_nvvm_f2ll_rm f32:$a), (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ll_rp_ftz f32:$a), (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), +def : Pat<(int_nvvm_f2ll_rp f32:$a), (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ull_rn_ftz f32:$a), (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; -def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), +def : Pat<(int_nvvm_f2ull_rn f32:$a), (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ull_rz_ftz f32:$a), (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; -def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), +def : Pat<(int_nvvm_f2ull_rz f32:$a), (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ull_rm_ftz f32:$a), (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; -def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), +def : Pat<(int_nvvm_f2ull_rm f32:$a), (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2ull_rp_ftz f32:$a), (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; -def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), +def : Pat<(int_nvvm_f2ull_rp f32:$a), (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), +def : Pat<(int_nvvm_d2ll_rn f64:$a), (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), +def : Pat<(int_nvvm_d2ll_rz f64:$a), (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), +def : Pat<(int_nvvm_d2ll_rm f64:$a), (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), +def : Pat<(int_nvvm_d2ll_rp f64:$a), (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), +def : Pat<(int_nvvm_d2ull_rn f64:$a), (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; -def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), +def : Pat<(int_nvvm_d2ull_rz f64:$a), (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), +def : Pat<(int_nvvm_d2ull_rm f64:$a), (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; -def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), +def : Pat<(int_nvvm_d2ull_rp f64:$a), (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; -def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), +def : Pat<(int_nvvm_ll2f_rn i64:$a), (CVT_f32_s64 Int64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), +def : Pat<(int_nvvm_ll2f_rz i64:$a), (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), +def : Pat<(int_nvvm_ll2f_rm i64:$a), (CVT_f32_s64 Int64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), +def : Pat<(int_nvvm_ll2f_rp i64:$a), (CVT_f32_s64 Int64Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), +def : Pat<(int_nvvm_ull2f_rn i64:$a), (CVT_f32_u64 Int64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), +def : Pat<(int_nvvm_ull2f_rz i64:$a), (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), +def : Pat<(int_nvvm_ull2f_rm i64:$a), (CVT_f32_u64 Int64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), +def : Pat<(int_nvvm_ull2f_rp i64:$a), (CVT_f32_u64 Int64Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), +def : Pat<(int_nvvm_ll2d_rn i64:$a), (CVT_f64_s64 Int64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), +def : Pat<(int_nvvm_ll2d_rz i64:$a), (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), +def : Pat<(int_nvvm_ll2d_rm i64:$a), (CVT_f64_s64 Int64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), +def : Pat<(int_nvvm_ll2d_rp i64:$a), (CVT_f64_s64 Int64Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), +def : Pat<(int_nvvm_ull2d_rn i64:$a), (CVT_f64_u64 Int64Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), +def : Pat<(int_nvvm_ull2d_rz i64:$a), (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; -def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), +def : Pat<(int_nvvm_ull2d_rm i64:$a), (CVT_f64_u64 Int64Regs:$a, CvtRM)>; -def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), +def : Pat<(int_nvvm_ull2d_rp i64:$a), (CVT_f64_u64 Int64Regs:$a, CvtRP)>; -def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), +def : Pat<(int_nvvm_f2h_rn_ftz f32:$a), (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; -def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), +def : Pat<(int_nvvm_f2h_rn f32:$a), (CVT_f16_f32 Float32Regs:$a, CvtRN)>; -def : Pat<(int_nvvm_ff_to_e4m3x2_rn Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b), (CVT_e4m3x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; -def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b), (CVT_e4m3x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; -def : Pat<(int_nvvm_ff_to_e5m2x2_rn Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b), (CVT_e5m2x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; -def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu Float32Regs:$a, Float32Regs:$b), +def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b), (CVT_e5m2x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn Int32Regs:$a), @@ -1809,19 +1809,19 @@ def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu Int16Regs:$a), class INT_FNS_MBO : NVPTXInst<(outs Int32Regs:$dst), ins, "fns.b32 \t$dst, $mask, $base, $offset;", - [(set Int32Regs:$dst, Operands )]>, + [(set i32:$dst, Operands)]>, Requires<[hasPTX<60>, hasSM<30>]>; def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), - (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; + (int_nvvm_fns i32:$mask, i32:$base, i32:$offset)>; def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), - (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; + (int_nvvm_fns i32:$mask, i32:$base, imm:$offset)>; def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), - (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; + (int_nvvm_fns i32:$mask, imm:$base, i32:$offset)>; def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), - (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; + (int_nvvm_fns i32:$mask, imm:$base, imm:$offset)>; def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), - (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; + (int_nvvm_fns imm:$mask, i32:$base, i32:$offset)>; def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), @@ -2363,7 +2363,7 @@ class ATOM23_impl : NVPTXInst<(outs regclass:$result), ins, AsmStr, - [(set (regT regclass:$result), Operands)]>, + [(set regT:$result, Operands)]>, Requires; // Define instruction variants for all addressing modes. @@ -2374,26 +2374,26 @@ multiclass ATOM2P_impl; + (Intr i16:$src, regT:$b)>; def : ATOM23_impl; + (Intr i32:$src, regT:$b)>; def : ATOM23_impl; + (Intr i64:$src, regT:$b)>; } // tablegen can't infer argument types from Intrinsic (though it can // from Instruction) so we have to enforce specific type on // immediates via explicit cast to ImmTy. def : ATOM23_impl; + (Intr i16:$src, (ImmTy Imm:$b))>; def : ATOM23_impl; + (Intr i32:$src, (ImmTy Imm:$b))>; def : ATOM23_impl; + (Intr i64:$src, (ImmTy Imm:$b))>; } multiclass ATOM3P_impl; + (Intr i32:$src, regT:$b, regT:$c)>; def : ATOM23_impl; + (Intr i64:$src, regT:$b, regT:$c)>; } let AddedComplexity = 1 in { def : ATOM23_impl; + (Intr i32:$src, (ImmTy Imm:$b), regT:$c)>; def : ATOM23_impl; + (Intr i64:$src, (ImmTy Imm:$b), regT:$c)>; def : ATOM23_impl; + (Intr i32:$src, regT:$b, (ImmTy Imm:$c))>; def : ATOM23_impl; + (Intr i64:$src, regT:$b, (ImmTy Imm:$c))>; } def : ATOM23_impl; + (Intr i32:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; def : ATOM23_impl; + (Intr i64:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; } // Constructs intrinsic name and instruction asm strings. @@ -2795,48 +2795,48 @@ defm cvta_to_const : G_TO_NG<"const">; // nvvm.ptr.param.to.gen defm cvta_param : NG_TO_G<"param">; -def : Pat<(int_nvvm_ptr_param_to_gen Int32Regs:$src), +def : Pat<(int_nvvm_ptr_param_to_gen i32:$src), (cvta_param Int32Regs:$src)>; -def : Pat<(int_nvvm_ptr_param_to_gen Int64Regs:$src), +def : Pat<(int_nvvm_ptr_param_to_gen i64:$src), (cvta_param_64 Int64Regs:$src)>; // nvvm.ptr.gen.to.param -def : Pat<(int_nvvm_ptr_gen_to_param Int32Regs:$src), +def : Pat<(int_nvvm_ptr_gen_to_param i32:$src), (IMOV32rr Int32Regs:$src)>; -def : Pat<(int_nvvm_ptr_gen_to_param Int64Regs:$src), +def : Pat<(int_nvvm_ptr_gen_to_param i64:$src), (IMOV64rr Int64Regs:$src)>; // nvvm.move intrinsicc def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), "mov.b16 \t$r, $s;", - [(set Int16Regs:$r, - (int_nvvm_move_i16 Int16Regs:$s))]>; + [(set i16:$r, + (int_nvvm_move_i16 i16:$s))]>; def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), "mov.b32 \t$r, $s;", - [(set Int32Regs:$r, - (int_nvvm_move_i32 Int32Regs:$s))]>; + [(set i32:$r, + (int_nvvm_move_i32 i32:$s))]>; def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), "mov.b64 \t$r, $s;", - [(set Int64Regs:$r, - (int_nvvm_move_i64 Int64Regs:$s))]>; + [(set i64:$r, + (int_nvvm_move_i64 i64:$s))]>; def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), "mov.f32 \t$r, $s;", - [(set Float32Regs:$r, - (int_nvvm_move_float Float32Regs:$s))]>; + [(set f32:$r, + (int_nvvm_move_float f32:$s))]>; def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), "mov.f64 \t$r, $s;", - [(set Float64Regs:$r, - (int_nvvm_move_double Float64Regs:$s))]>; + [(set f64:$r, + (int_nvvm_move_double f64:$s))]>; def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), "mov.u32 \t$r, $s;", - [(set Int32Regs:$r, - (int_nvvm_move_ptr Int32Regs:$s))]>; + [(set i32:$r, + (int_nvvm_move_ptr i32:$s))]>; def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), "mov.u64 \t$r, $s;", - [(set Int64Regs:$r, - (int_nvvm_move_ptr Int64Regs:$s))]>; + [(set i64:$r, + (int_nvvm_move_ptr i64:$s))]>; // @TODO: Are these actually needed, or will we always just see symbols // copied to registers first? @@ -2860,16 +2860,16 @@ def texsurf_handles def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), "// llvm.nvvm.compiler.warn()", - [(int_nvvm_compiler_warn Int32Regs:$a)]>; + [(int_nvvm_compiler_warn i32:$a)]>; def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), "// llvm.nvvm.compiler.warn()", - [(int_nvvm_compiler_warn Int64Regs:$a)]>; + [(int_nvvm_compiler_warn i64:$a)]>; def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), "// llvm.nvvm.compiler.error()", - [(int_nvvm_compiler_error Int32Regs:$a)]>; + [(int_nvvm_compiler_error i32:$a)]>; def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), "// llvm.nvvm.compiler.error()", - [(int_nvvm_compiler_error Int64Regs:$a)]>; + [(int_nvvm_compiler_error i64:$a)]>; // isspacep @@ -2877,11 +2877,11 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), multiclass ISSPACEP Preds = []> { def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), "isspacep." # suffix # "\t$d, $a;", - [(set Int1Regs:$d, (Intr Int32Regs:$a))]>, + [(set i1:$d, (Intr i32:$a))]>, Requires; def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), "isspacep." # suffix # "\t$d, $a;", - [(set Int1Regs:$d, (Intr Int64Regs:$a))]>, + [(set i1:$d, (Intr i64:$a))]>, Requires; } @@ -2932,7 +2932,7 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; -def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), +def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src), (V2I32toI64 (I64toI32H Int64Regs:$src), (I64toI32L Int64Regs:$src))> ; @@ -5039,22 +5039,22 @@ def TXQ_NUM_MIPMAP_LEVELS_I []>; } -def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), - (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), - (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_width Int64Regs:$a), - (TXQ_WIDTH_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_height Int64Regs:$a), - (TXQ_HEIGHT_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_depth Int64Regs:$a), - (TXQ_DEPTH_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), - (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), - (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; -def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), - (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_channel_order i64:$a), + (TXQ_CHANNEL_ORDER_R i64:$a)>; +def : Pat<(int_nvvm_txq_channel_data_type i64:$a), + (TXQ_CHANNEL_DATA_TYPE_R i64:$a)>; +def : Pat<(int_nvvm_txq_width i64:$a), + (TXQ_WIDTH_R i64:$a)>; +def : Pat<(int_nvvm_txq_height i64:$a), + (TXQ_HEIGHT_R i64:$a)>; +def : Pat<(int_nvvm_txq_depth i64:$a), + (TXQ_DEPTH_R i64:$a)>; +def : Pat<(int_nvvm_txq_array_size i64:$a), + (TXQ_ARRAY_SIZE_R i64:$a)>; +def : Pat<(int_nvvm_txq_num_samples i64:$a), + (TXQ_NUM_SAMPLES_R i64:$a)>; +def : Pat<(int_nvvm_txq_num_mipmap_levels i64:$a), + (TXQ_NUM_MIPMAP_LEVELS_R i64:$a)>; //----------------------------------- @@ -5112,17 +5112,17 @@ def SUQ_ARRAY_SIZE_I []>; } -def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), +def : Pat<(int_nvvm_suq_channel_order i64:$a), (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), +def : Pat<(int_nvvm_suq_channel_data_type i64:$a), (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_width Int64Regs:$a), +def : Pat<(int_nvvm_suq_width i64:$a), (SUQ_WIDTH_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_height Int64Regs:$a), +def : Pat<(int_nvvm_suq_height i64:$a), (SUQ_HEIGHT_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_depth Int64Regs:$a), +def : Pat<(int_nvvm_suq_depth i64:$a), (SUQ_DEPTH_R Int64Regs:$a)>; -def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), +def : Pat<(int_nvvm_suq_array_size i64:$a), (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; @@ -5132,15 +5132,15 @@ def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), def ISTYPEP_SAMPLER : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), "istypep.samplerref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; + [(set i1:$d, (int_nvvm_istypep_sampler i64:$a))]>; def ISTYPEP_SURFACE : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), "istypep.surfref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; + [(set i1:$d, (int_nvvm_istypep_surface i64:$a))]>; def ISTYPEP_TEXTURE : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), "istypep.texref \t$d, $a;", - [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; + [(set i1:$d, (int_nvvm_istypep_texture i64:$a))]>; //===- Surface Stores -----------------------------------------------------===// @@ -6931,13 +6931,13 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap class PTX_READ_SREG_R64 Preds=[]> : NVPTXInst<(outs Int64Regs:$d), (ins), !strconcat("mov.u64 \t$d, %", regname, ";"), - [(set Int64Regs:$d, (intop))]>, + [(set i64:$d, (intop))]>, Requires; class PTX_READ_SREG_R32 Preds=[]> : NVPTXInst<(outs Int32Regs:$d), (ins), !strconcat("mov.u32 \t$d, %", regname, ";"), - [(set Int32Regs:$d, (intop))]>, + [(set i32:$d, (intop))]>, Requires; multiclass PTX_READ_SREG_R32V4 Preds=[]> { @@ -7019,7 +7019,7 @@ def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; // handle the constant. def INT_PTX_SREG_WARPSIZE : NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", - [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; + [(set i32:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; // Helper class that represents a 'fragment' of an NVPTX *MMA instruction. // In addition to target-independent fields provided by WMMA_REGS, it adds @@ -7431,19 +7431,19 @@ foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in multiclass MAPA { def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b), "mapa" # suffix # ".u32\t$d, $a, $b;", - [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>, + [(set i32:$d, (Intr i32:$a, i32:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b), "mapa" # suffix # ".u32\t$d, $a, $b;", - [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>, + [(set i32:$d, (Intr i32:$a, imm:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b), "mapa" # suffix # ".u64\t$d, $a, $b;", - [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>, + [(set i64:$d, (Intr i64:$a, i32:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b), "mapa" # suffix # ".u64\t$d, $a, $b;", - [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>, + [(set i64:$d, (Intr i64:$a, imm:$b))]>, Requires<[hasSM<90>, hasPTX<78>]>; } @@ -7454,11 +7454,11 @@ defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluste multiclass GETCTARANK { def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), "getctarank" # suffix # ".u32\t$d, $a;", - [(set Int32Regs:$d, (Intr Int32Regs:$a))]>, + [(set i32:$d, (Intr i32:$a))]>, Requires<[hasSM<90>, hasPTX<78>]>; def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "getctarank" # suffix # ".u64\t$d, $a;", - [(set Int32Regs:$d, (Intr Int64Regs:$a))]>, + [(set i32:$d, (Intr i64:$a))]>, Requires<[hasSM<90>, hasPTX<78>]>; } @@ -7467,7 +7467,7 @@ defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getcta def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), "mov.pred\t$d, %is_explicit_cluster;", - [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, + [(set i1:$d, (int_nvvm_is_explicit_cluster))]>, Requires<[hasSM<90>, hasPTX<78>]>; // setmaxnreg inc/dec intrinsics From 7153a21916fa985dfba604c0edf04a8d3c44b389 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 17 Dec 2024 14:14:03 -0800 Subject: [PATCH 35/35] [libc][docs] update sphinx requirement hashes (#120315) Link: #120274 --- llvm/docs/requirements-hashed.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/docs/requirements-hashed.txt b/llvm/docs/requirements-hashed.txt index 64cfc8fc79799..07e051ca4a8ba 100644 --- a/llvm/docs/requirements-hashed.txt +++ b/llvm/docs/requirements-hashed.txt @@ -314,6 +314,7 @@ sphinx==7.2.6 \ # recommonmark # sphinx-automodapi # sphinx-basic-ng + # sphinx-reredirects # sphinxcontrib-devhelp # sphinxcontrib-htmlhelp # sphinxcontrib-qthelp @@ -334,6 +335,10 @@ sphinx-markdown-tables==0.0.17 \ --hash=sha256:2bd0c30779653e4dd120300cbd9ca412c480738cc2241f6dea477a883f299e04 \ --hash=sha256:6bc6d3d400eaccfeebd288446bc08dd83083367c58b85d40fe6c12d77ef592f1 # via -r requirements.txt +sphinx-reredirects==0.1.2 \ + --hash=sha256:3a22161771aadd448bb608a4fe7277252182a337af53c18372b7104531d71489 \ + --hash=sha256:a0e7213304759b01edc22f032f1715a1c61176fc8f167164e7a52b9feec9ac64 + # via -r requirements.txt sphinxcontrib-applehelp==1.0.8 \ --hash=sha256:c40a4f96f3776c4393d933412053962fac2b84f4c99a7982ba42e09576a70619 \ --hash=sha256:cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4